{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997382884061764, "eval_steps": 500, "global_step": 955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010468463752944255, "grad_norm": 12.233881208288006, "learning_rate": 5.208333333333333e-09, "logits/chosen": 5322.43310546875, "logits/rejected": 5816.03515625, "logps/chosen": -283.18994140625, "logps/rejected": -275.79241943359375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.010468463752944255, "grad_norm": 16.989280562459886, "learning_rate": 5.208333333333333e-08, "logits/chosen": 5120.96728515625, "logits/rejected": 4410.78564453125, "logps/chosen": -250.2166290283203, "logps/rejected": -240.45982360839844, "loss": 0.6932, "rewards/accuracies": 0.4236111044883728, "rewards/chosen": 0.00023470280575565994, "rewards/margins": 0.0004972594324499369, "rewards/rejected": -0.000262556568486616, "step": 10 }, { "epoch": 0.02093692750588851, "grad_norm": 10.887636563087568, "learning_rate": 1.0416666666666667e-07, "logits/chosen": 5454.58837890625, "logits/rejected": 4964.83203125, "logps/chosen": -268.21044921875, "logps/rejected": -243.5047149658203, "loss": 0.693, "rewards/accuracies": 0.53125, "rewards/chosen": 0.0003735010395757854, "rewards/margins": 0.0005810990696772933, "rewards/rejected": -0.00020759805920533836, "step": 20 }, { "epoch": 0.031405391258832765, "grad_norm": 11.5856361626738, "learning_rate": 1.5624999999999999e-07, "logits/chosen": 5136.9482421875, "logits/rejected": 4822.50341796875, "logps/chosen": -236.8883514404297, "logps/rejected": -273.16864013671875, "loss": 0.6922, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.0004290867655072361, "rewards/margins": 0.0023582351859658957, "rewards/rejected": -0.002787322038784623, "step": 30 }, { "epoch": 0.04187385501177702, "grad_norm": 26.154849694112038, "learning_rate": 2.0833333333333333e-07, "logits/chosen": 6112.22119140625, "logits/rejected": 5175.8818359375, "logps/chosen": -301.0946044921875, "logps/rejected": -269.2663269042969, "loss": 0.69, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.0013691156636923552, "rewards/margins": 0.005987053271383047, "rewards/rejected": -0.004617937840521336, "step": 40 }, { "epoch": 0.05234231876472128, "grad_norm": 11.472776003460595, "learning_rate": 2.604166666666667e-07, "logits/chosen": 5646.0126953125, "logits/rejected": 5320.97705078125, "logps/chosen": -273.63555908203125, "logps/rejected": -245.958251953125, "loss": 0.6852, "rewards/accuracies": 0.71875, "rewards/chosen": 0.016654713079333305, "rewards/margins": 0.017900904640555382, "rewards/rejected": -0.0012461928417906165, "step": 50 }, { "epoch": 0.06281078251766553, "grad_norm": 11.964657947052096, "learning_rate": 3.1249999999999997e-07, "logits/chosen": 5076.138671875, "logits/rejected": 4593.20263671875, "logps/chosen": -238.00473022460938, "logps/rejected": -225.4451446533203, "loss": 0.679, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.03560272231698036, "rewards/margins": 0.028335992246866226, "rewards/rejected": 0.00726673286408186, "step": 60 }, { "epoch": 0.07327924627060979, "grad_norm": 12.066256405857127, "learning_rate": 3.645833333333333e-07, "logits/chosen": 5743.9794921875, "logits/rejected": 5056.69677734375, "logps/chosen": -273.55438232421875, "logps/rejected": -281.1863098144531, "loss": 0.6678, "rewards/accuracies": 0.65625, "rewards/chosen": 0.03722140192985535, "rewards/margins": 0.05429575592279434, "rewards/rejected": -0.017074348405003548, "step": 70 }, { "epoch": 0.08374771002355404, "grad_norm": 11.889136757243032, "learning_rate": 4.1666666666666667e-07, "logits/chosen": 6238.6103515625, "logits/rejected": 4811.1123046875, "logps/chosen": -296.5029296875, "logps/rejected": -269.83697509765625, "loss": 0.655, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.028454985469579697, "rewards/margins": 0.11004779487848282, "rewards/rejected": -0.08159281313419342, "step": 80 }, { "epoch": 0.0942161737764983, "grad_norm": 18.902525693355866, "learning_rate": 4.6874999999999996e-07, "logits/chosen": 6128.4267578125, "logits/rejected": 5638.568359375, "logps/chosen": -285.3127136230469, "logps/rejected": -279.41156005859375, "loss": 0.6383, "rewards/accuracies": 0.71875, "rewards/chosen": -0.04132311791181564, "rewards/margins": 0.14937947690486908, "rewards/rejected": -0.19070258736610413, "step": 90 }, { "epoch": 0.10468463752944256, "grad_norm": 23.592724134950643, "learning_rate": 4.999732492681437e-07, "logits/chosen": 6226.07421875, "logits/rejected": 5436.36767578125, "logps/chosen": -308.2719421386719, "logps/rejected": -306.774658203125, "loss": 0.6174, "rewards/accuracies": 0.65625, "rewards/chosen": -0.16117054224014282, "rewards/margins": 0.16605570912361145, "rewards/rejected": -0.3272262513637543, "step": 100 }, { "epoch": 0.11515310128238682, "grad_norm": 18.215929113599582, "learning_rate": 4.996723692767926e-07, "logits/chosen": 6454.75634765625, "logits/rejected": 5713.9111328125, "logps/chosen": -305.07763671875, "logps/rejected": -312.5370178222656, "loss": 0.6195, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23803937435150146, "rewards/margins": 0.20612268149852753, "rewards/rejected": -0.4441620707511902, "step": 110 }, { "epoch": 0.12562156503533106, "grad_norm": 17.779853029499233, "learning_rate": 4.990375746213598e-07, "logits/chosen": 5647.10498046875, "logits/rejected": 5032.18115234375, "logps/chosen": -280.08380126953125, "logps/rejected": -302.64801025390625, "loss": 0.6002, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.20821234583854675, "rewards/margins": 0.2683521807193756, "rewards/rejected": -0.4765644967556, "step": 120 }, { "epoch": 0.1360900287882753, "grad_norm": 19.50880449097703, "learning_rate": 4.980697142834314e-07, "logits/chosen": 6471.6474609375, "logits/rejected": 4744.6669921875, "logps/chosen": -332.22821044921875, "logps/rejected": -327.31085205078125, "loss": 0.5715, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.3157106041908264, "rewards/margins": 0.419821172952652, "rewards/rejected": -0.7355316877365112, "step": 130 }, { "epoch": 0.14655849254121958, "grad_norm": 25.85535641752715, "learning_rate": 4.967700826904229e-07, "logits/chosen": 6072.1328125, "logits/rejected": 5260.83349609375, "logps/chosen": -318.0292663574219, "logps/rejected": -324.5956726074219, "loss": 0.5832, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4499703347682953, "rewards/margins": 0.34283071756362915, "rewards/rejected": -0.792801022529602, "step": 140 }, { "epoch": 0.15702695629416383, "grad_norm": 26.628753832444012, "learning_rate": 4.951404179843962e-07, "logits/chosen": 6003.0, "logits/rejected": 5400.46923828125, "logps/chosen": -325.4483642578125, "logps/rejected": -369.15814208984375, "loss": 0.5619, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4490388035774231, "rewards/margins": 0.4607365131378174, "rewards/rejected": -0.9097753763198853, "step": 150 }, { "epoch": 0.16749542004710807, "grad_norm": 25.873571320131013, "learning_rate": 4.931828996974498e-07, "logits/chosen": 5745.4921875, "logits/rejected": 4877.51220703125, "logps/chosen": -316.9101257324219, "logps/rejected": -339.6864013671875, "loss": 0.6081, "rewards/accuracies": 0.71875, "rewards/chosen": -0.41086092591285706, "rewards/margins": 0.40860843658447266, "rewards/rejected": -0.8194693326950073, "step": 160 }, { "epoch": 0.17796388380005235, "grad_norm": 22.873805939874973, "learning_rate": 4.909001458367866e-07, "logits/chosen": 5762.07373046875, "logits/rejected": 5318.44775390625, "logps/chosen": -317.30718994140625, "logps/rejected": -352.52178955078125, "loss": 0.5765, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4623674750328064, "rewards/margins": 0.4159014821052551, "rewards/rejected": -0.8782688975334167, "step": 170 }, { "epoch": 0.1884323475529966, "grad_norm": 30.21865790125414, "learning_rate": 4.882952093833627e-07, "logits/chosen": 6149.8720703125, "logits/rejected": 5386.2421875, "logps/chosen": -366.20965576171875, "logps/rejected": -386.5586853027344, "loss": 0.5682, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7474745512008667, "rewards/margins": 0.49005889892578125, "rewards/rejected": -1.237533450126648, "step": 180 }, { "epoch": 0.19890081130594087, "grad_norm": 29.175279577033287, "learning_rate": 4.853715742087946e-07, "logits/chosen": 6069.8681640625, "logits/rejected": 5187.52587890625, "logps/chosen": -359.46783447265625, "logps/rejected": -392.0047912597656, "loss": 0.5253, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7178874015808105, "rewards/margins": 0.7048022150993347, "rewards/rejected": -1.42268967628479, "step": 190 }, { "epoch": 0.2093692750588851, "grad_norm": 32.38750462377469, "learning_rate": 4.821331504159906e-07, "logits/chosen": 5714.408203125, "logits/rejected": 4652.3173828125, "logps/chosen": -347.4368591308594, "logps/rejected": -376.1549377441406, "loss": 0.5437, "rewards/accuracies": 0.75, "rewards/chosen": -0.8864706158638, "rewards/margins": 0.655554473400116, "rewards/rejected": -1.5420252084732056, "step": 200 }, { "epoch": 0.21983773881182936, "grad_norm": 29.7104935551802, "learning_rate": 4.785842691097342e-07, "logits/chosen": 5336.28564453125, "logits/rejected": 4851.28271484375, "logps/chosen": -352.58251953125, "logps/rejected": -445.9090270996094, "loss": 0.5426, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.1549742221832275, "rewards/margins": 0.7335622310638428, "rewards/rejected": -1.8885362148284912, "step": 210 }, { "epoch": 0.23030620256477363, "grad_norm": 37.44571804593422, "learning_rate": 4.7472967660421603e-07, "logits/chosen": 6064.4755859375, "logits/rejected": 5107.64453125, "logps/chosen": -390.0788269042969, "logps/rejected": -448.6966247558594, "loss": 0.5285, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0608923435211182, "rewards/margins": 0.7451533079147339, "rewards/rejected": -1.8060455322265625, "step": 220 }, { "epoch": 0.24077466631771788, "grad_norm": 28.35309617040778, "learning_rate": 4.705745280752585e-07, "logits/chosen": 6296.2744140625, "logits/rejected": 5031.78515625, "logps/chosen": -371.34417724609375, "logps/rejected": -429.86907958984375, "loss": 0.5225, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.637545108795166, "rewards/margins": 0.8228956460952759, "rewards/rejected": -1.4604408740997314, "step": 230 }, { "epoch": 0.2512431300706621, "grad_norm": 40.6298906852928, "learning_rate": 4.6612438066572555e-07, "logits/chosen": 6102.9443359375, "logits/rejected": 5360.6923828125, "logps/chosen": -387.57659912109375, "logps/rejected": -470.2581481933594, "loss": 0.5337, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8906810879707336, "rewards/margins": 0.9280546307563782, "rewards/rejected": -1.8187354803085327, "step": 240 }, { "epoch": 0.26171159382360637, "grad_norm": 29.750912562989477, "learning_rate": 4.6138518605333664e-07, "logits/chosen": 5749.36328125, "logits/rejected": 5570.09228515625, "logps/chosen": -421.972412109375, "logps/rejected": -455.70404052734375, "loss": 0.542, "rewards/accuracies": 0.71875, "rewards/chosen": -1.4695032835006714, "rewards/margins": 0.5432707071304321, "rewards/rejected": -2.0127739906311035, "step": 250 }, { "epoch": 0.2721800575765506, "grad_norm": 29.444523548889077, "learning_rate": 4.5636328249082514e-07, "logits/chosen": 6460.7841796875, "logits/rejected": 5733.99609375, "logps/chosen": -367.92193603515625, "logps/rejected": -425.7235412597656, "loss": 0.5446, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9084042310714722, "rewards/margins": 0.5484649538993835, "rewards/rejected": -1.4568692445755005, "step": 260 }, { "epoch": 0.2826485213294949, "grad_norm": 37.100987100214596, "learning_rate": 4.510653863290871e-07, "logits/chosen": 6088.40576171875, "logits/rejected": 4790.62353515625, "logps/chosen": -403.88348388671875, "logps/rejected": -488.5504455566406, "loss": 0.5379, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2523295879364014, "rewards/margins": 0.9070437550544739, "rewards/rejected": -2.1593735218048096, "step": 270 }, { "epoch": 0.29311698508243916, "grad_norm": 39.31056299439328, "learning_rate": 4.4549858303465737e-07, "logits/chosen": 5772.44970703125, "logits/rejected": 5060.7470703125, "logps/chosen": -381.917236328125, "logps/rejected": -456.645263671875, "loss": 0.5021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1831345558166504, "rewards/margins": 0.7890382409095764, "rewards/rejected": -1.9721729755401611, "step": 280 }, { "epoch": 0.3035854488353834, "grad_norm": 23.93190499655242, "learning_rate": 4.396703177135261e-07, "logits/chosen": 5911.7919921875, "logits/rejected": 4911.6435546875, "logps/chosen": -352.9964294433594, "logps/rejected": -405.97808837890625, "loss": 0.5424, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8892698287963867, "rewards/margins": 0.673610508441925, "rewards/rejected": -1.562880277633667, "step": 290 }, { "epoch": 0.31405391258832765, "grad_norm": 40.1981486142399, "learning_rate": 4.335883851539693e-07, "logits/chosen": 5907.3408203125, "logits/rejected": 5368.95263671875, "logps/chosen": -374.5801086425781, "logps/rejected": -431.5013122558594, "loss": 0.4832, "rewards/accuracies": 0.6875, "rewards/chosen": -1.047106146812439, "rewards/margins": 0.8196779489517212, "rewards/rejected": -1.8667840957641602, "step": 300 }, { "epoch": 0.3245223763412719, "grad_norm": 40.62136054464967, "learning_rate": 4.272609194017105e-07, "logits/chosen": 5774.359375, "logits/rejected": 5246.037109375, "logps/chosen": -443.6797790527344, "logps/rejected": -515.5135498046875, "loss": 0.5188, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.504050612449646, "rewards/margins": 0.9943771362304688, "rewards/rejected": -2.498427629470825, "step": 310 }, { "epoch": 0.33499084009421615, "grad_norm": 30.769556635750714, "learning_rate": 4.2069638288135547e-07, "logits/chosen": 5359.9453125, "logits/rejected": 4785.0048828125, "logps/chosen": -391.6275329589844, "logps/rejected": -460.42816162109375, "loss": 0.5048, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.365079641342163, "rewards/margins": 0.7470657229423523, "rewards/rejected": -2.11214542388916, "step": 320 }, { "epoch": 0.34545930384716045, "grad_norm": 40.1478502964153, "learning_rate": 4.139035550786494e-07, "logits/chosen": 6065.78759765625, "logits/rejected": 5532.21044921875, "logps/chosen": -453.89739990234375, "logps/rejected": -554.7542114257812, "loss": 0.5099, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.880091905593872, "rewards/margins": 0.8712676763534546, "rewards/rejected": -2.751359701156616, "step": 330 }, { "epoch": 0.3559277676001047, "grad_norm": 32.658076801350624, "learning_rate": 4.0689152079869306e-07, "logits/chosen": 5996.52490234375, "logits/rejected": 5058.8349609375, "logps/chosen": -528.025634765625, "logps/rejected": -587.1847534179688, "loss": 0.5282, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.278002977371216, "rewards/margins": 0.9359167218208313, "rewards/rejected": -3.2139194011688232, "step": 340 }, { "epoch": 0.36639623135304894, "grad_norm": 28.66276457508689, "learning_rate": 3.99669658015821e-07, "logits/chosen": 5721.43994140625, "logits/rejected": 4679.72802734375, "logps/chosen": -396.7667236328125, "logps/rejected": -416.16864013671875, "loss": 0.5438, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.1611254215240479, "rewards/margins": 0.7621843218803406, "rewards/rejected": -1.9233099222183228, "step": 350 }, { "epoch": 0.3768646951059932, "grad_norm": 30.032506888166502, "learning_rate": 3.92247625331392e-07, "logits/chosen": 6061.701171875, "logits/rejected": 5040.14306640625, "logps/chosen": -381.09661865234375, "logps/rejected": -438.5071716308594, "loss": 0.5247, "rewards/accuracies": 0.75, "rewards/chosen": -1.0270540714263916, "rewards/margins": 0.7207306623458862, "rewards/rejected": -1.7477848529815674, "step": 360 }, { "epoch": 0.38733315885893743, "grad_norm": 39.84919745399661, "learning_rate": 3.846353490562664e-07, "logits/chosen": 6276.72216796875, "logits/rejected": 5093.4501953125, "logps/chosen": -423.4830627441406, "logps/rejected": -462.71502685546875, "loss": 0.5091, "rewards/accuracies": 0.75, "rewards/chosen": -1.3078060150146484, "rewards/margins": 0.8824012875556946, "rewards/rejected": -2.1902072429656982, "step": 370 }, { "epoch": 0.39780162261188173, "grad_norm": 50.23073848323155, "learning_rate": 3.768430099352445e-07, "logits/chosen": 5165.4736328125, "logits/rejected": 4407.61083984375, "logps/chosen": -427.8279724121094, "logps/rejected": -459.33758544921875, "loss": 0.5613, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.7211039066314697, "rewards/margins": 0.6335922479629517, "rewards/rejected": -2.354696035385132, "step": 380 }, { "epoch": 0.408270086364826, "grad_norm": 44.81398980022605, "learning_rate": 3.6888102953122304e-07, "logits/chosen": 5707.7646484375, "logits/rejected": 5237.1826171875, "logps/chosen": -471.45172119140625, "logps/rejected": -493.2674865722656, "loss": 0.5258, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5474374294281006, "rewards/margins": 0.7440322041511536, "rewards/rejected": -2.2914695739746094, "step": 390 }, { "epoch": 0.4187385501177702, "grad_norm": 37.98320468674642, "learning_rate": 3.607600562872785e-07, "logits/chosen": 5730.2880859375, "logits/rejected": 5193.93798828125, "logps/chosen": -401.8053283691406, "logps/rejected": -471.9474182128906, "loss": 0.5158, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.2514318227767944, "rewards/margins": 0.7861765623092651, "rewards/rejected": -2.0376086235046387, "step": 400 }, { "epoch": 0.42920701387071447, "grad_norm": 33.81310309482597, "learning_rate": 3.5249095128531856e-07, "logits/chosen": 6085.8251953125, "logits/rejected": 5373.3017578125, "logps/chosen": -439.89093017578125, "logps/rejected": -499.835205078125, "loss": 0.4934, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.520615816116333, "rewards/margins": 0.7662434577941895, "rewards/rejected": -2.2868590354919434, "step": 410 }, { "epoch": 0.4396754776236587, "grad_norm": 30.632303720188528, "learning_rate": 3.4408477372034736e-07, "logits/chosen": 6322.82177734375, "logits/rejected": 5430.2177734375, "logps/chosen": -468.886962890625, "logps/rejected": -515.0479736328125, "loss": 0.5064, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.6579067707061768, "rewards/margins": 0.7777348756790161, "rewards/rejected": -2.4356415271759033, "step": 420 }, { "epoch": 0.45014394137660296, "grad_norm": 30.251752237862295, "learning_rate": 3.3555276610977276e-07, "logits/chosen": 6113.8271484375, "logits/rejected": 5092.26513671875, "logps/chosen": -460.91326904296875, "logps/rejected": -556.900146484375, "loss": 0.4994, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -1.7264970541000366, "rewards/margins": 1.0438661575317383, "rewards/rejected": -2.7703633308410645, "step": 430 }, { "epoch": 0.46061240512954726, "grad_norm": 37.908875986059144, "learning_rate": 3.269063392575352e-07, "logits/chosen": 6295.7158203125, "logits/rejected": 5158.9677734375, "logps/chosen": -464.58453369140625, "logps/rejected": -550.1458129882812, "loss": 0.4817, "rewards/accuracies": 0.78125, "rewards/chosen": -1.7317163944244385, "rewards/margins": 1.185530185699463, "rewards/rejected": -2.9172468185424805, "step": 440 }, { "epoch": 0.4710808688824915, "grad_norm": 41.312379838286745, "learning_rate": 3.1815705699316964e-07, "logits/chosen": 5880.7548828125, "logits/rejected": 4555.7763671875, "logps/chosen": -481.61669921875, "logps/rejected": -538.3797607421875, "loss": 0.5089, "rewards/accuracies": 0.71875, "rewards/chosen": -2.060168504714966, "rewards/margins": 1.0646240711212158, "rewards/rejected": -3.1247925758361816, "step": 450 }, { "epoch": 0.48154933263543576, "grad_norm": 34.66983711949397, "learning_rate": 3.0931662070620794e-07, "logits/chosen": 6316.578125, "logits/rejected": 5165.2431640625, "logps/chosen": -506.47796630859375, "logps/rejected": -567.468505859375, "loss": 0.487, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9843339920043945, "rewards/margins": 1.0964834690093994, "rewards/rejected": -3.080817461013794, "step": 460 }, { "epoch": 0.49201779638838, "grad_norm": 29.279122156736815, "learning_rate": 3.003968536966078e-07, "logits/chosen": 6218.03515625, "logits/rejected": 5217.77587890625, "logps/chosen": -461.3318786621094, "logps/rejected": -556.4960327148438, "loss": 0.4688, "rewards/accuracies": 0.71875, "rewards/chosen": -1.8045008182525635, "rewards/margins": 1.021553874015808, "rewards/rejected": -2.826054573059082, "step": 470 }, { "epoch": 0.5024862601413242, "grad_norm": 30.76156674057149, "learning_rate": 2.9140968536213693e-07, "logits/chosen": 6263.6962890625, "logits/rejected": 5435.2548828125, "logps/chosen": -494.4794921875, "logps/rejected": -567.3831176757812, "loss": 0.475, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.9724314212799072, "rewards/margins": 0.9922296404838562, "rewards/rejected": -2.964661121368408, "step": 480 }, { "epoch": 0.5129547238942685, "grad_norm": 33.456088567553, "learning_rate": 2.823671352438608e-07, "logits/chosen": 5696.94580078125, "logits/rejected": 4792.57470703125, "logps/chosen": -445.6405334472656, "logps/rejected": -528.25927734375, "loss": 0.4936, "rewards/accuracies": 0.75, "rewards/chosen": -1.7750091552734375, "rewards/margins": 0.9487094879150391, "rewards/rejected": -2.7237186431884766, "step": 490 }, { "epoch": 0.5234231876472127, "grad_norm": 38.50479369220262, "learning_rate": 2.73281296951072e-07, "logits/chosen": 5834.8134765625, "logits/rejected": 4785.27880859375, "logps/chosen": -402.5334777832031, "logps/rejected": -497.637939453125, "loss": 0.52, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3989527225494385, "rewards/margins": 1.02751886844635, "rewards/rejected": -2.42647123336792, "step": 500 }, { "epoch": 0.533891651400157, "grad_norm": 33.63811991621766, "learning_rate": 2.641643219871597e-07, "logits/chosen": 6426.5322265625, "logits/rejected": 5097.47265625, "logps/chosen": -459.89752197265625, "logps/rejected": -567.6480712890625, "loss": 0.4775, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5839624404907227, "rewards/margins": 1.2366297245025635, "rewards/rejected": -2.820592164993286, "step": 510 }, { "epoch": 0.5443601151531012, "grad_norm": 32.436951663981176, "learning_rate": 2.550284034980507e-07, "logits/chosen": 5434.05322265625, "logits/rejected": 4500.779296875, "logps/chosen": -368.56280517578125, "logps/rejected": -489.06268310546875, "loss": 0.4912, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.2078874111175537, "rewards/margins": 1.2767075300216675, "rewards/rejected": -2.4845948219299316, "step": 520 }, { "epoch": 0.5548285789060455, "grad_norm": 34.008081752081694, "learning_rate": 2.4588575996495794e-07, "logits/chosen": 5856.4404296875, "logits/rejected": 4918.63525390625, "logps/chosen": -404.7958984375, "logps/rejected": -476.4895935058594, "loss": 0.4838, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.3817389011383057, "rewards/margins": 0.9118432998657227, "rewards/rejected": -2.2935822010040283, "step": 530 }, { "epoch": 0.5652970426589898, "grad_norm": 35.102969742190474, "learning_rate": 2.367486188632446e-07, "logits/chosen": 5812.46142578125, "logits/rejected": 5240.39794921875, "logps/chosen": -456.4381408691406, "logps/rejected": -543.23681640625, "loss": 0.4926, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.8322960138320923, "rewards/margins": 0.8835972547531128, "rewards/rejected": -2.715893030166626, "step": 540 }, { "epoch": 0.575765506411934, "grad_norm": 37.412143947895096, "learning_rate": 2.276292003092593e-07, "logits/chosen": 6094.001953125, "logits/rejected": 4843.4091796875, "logps/chosen": -519.9254760742188, "logps/rejected": -609.431640625, "loss": 0.4546, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.298638343811035, "rewards/margins": 1.1952354907989502, "rewards/rejected": -3.4938740730285645, "step": 550 }, { "epoch": 0.5862339701648783, "grad_norm": 37.77810133971839, "learning_rate": 2.185397007170141e-07, "logits/chosen": 6479.396484375, "logits/rejected": 5155.97509765625, "logps/chosen": -541.1888427734375, "logps/rejected": -611.9423217773438, "loss": 0.4864, "rewards/accuracies": 0.78125, "rewards/chosen": -2.4345860481262207, "rewards/margins": 1.149322271347046, "rewards/rejected": -3.5839085578918457, "step": 560 }, { "epoch": 0.5967024339178225, "grad_norm": 42.433218469696, "learning_rate": 2.094922764865619e-07, "logits/chosen": 5488.037109375, "logits/rejected": 4728.3662109375, "logps/chosen": -587.1964111328125, "logps/rejected": -684.1151123046875, "loss": 0.5109, "rewards/accuracies": 0.75, "rewards/chosen": -3.17779278755188, "rewards/margins": 1.1383225917816162, "rewards/rejected": -4.316114902496338, "step": 570 }, { "epoch": 0.6071708976707668, "grad_norm": 34.38476539071494, "learning_rate": 2.0049902774588797e-07, "logits/chosen": 5764.37060546875, "logits/rejected": 4585.7060546875, "logps/chosen": -524.8135986328125, "logps/rejected": -587.9937744140625, "loss": 0.4824, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.506309747695923, "rewards/margins": 1.0128228664398193, "rewards/rejected": -3.519132614135742, "step": 580 }, { "epoch": 0.6176393614237111, "grad_norm": 33.27923069327013, "learning_rate": 1.9157198216806238e-07, "logits/chosen": 5711.4716796875, "logits/rejected": 4474.36376953125, "logps/chosen": -456.5804138183594, "logps/rejected": -514.6885986328125, "loss": 0.5096, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.9759197235107422, "rewards/margins": 0.8355296850204468, "rewards/rejected": -2.8114495277404785, "step": 590 }, { "epoch": 0.6281078251766553, "grad_norm": 38.31201330631414, "learning_rate": 1.8272307888529274e-07, "logits/chosen": 5056.95068359375, "logits/rejected": 4165.72705078125, "logps/chosen": -422.2060546875, "logps/rejected": -489.57684326171875, "loss": 0.4864, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7633835077285767, "rewards/margins": 0.8871301412582397, "rewards/rejected": -2.6505138874053955, "step": 600 }, { "epoch": 0.6385762889295996, "grad_norm": 41.114159871217296, "learning_rate": 1.7396415252139288e-07, "logits/chosen": 5579.69677734375, "logits/rejected": 4436.1650390625, "logps/chosen": -470.24169921875, "logps/rejected": -561.1392822265625, "loss": 0.4597, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.002847671508789, "rewards/margins": 1.059482216835022, "rewards/rejected": -3.0623297691345215, "step": 610 }, { "epoch": 0.6490447526825438, "grad_norm": 36.082277385319934, "learning_rate": 1.6530691736402316e-07, "logits/chosen": 5616.26123046875, "logits/rejected": 5027.68408203125, "logps/chosen": -455.79058837890625, "logps/rejected": -596.6024169921875, "loss": 0.4451, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0754637718200684, "rewards/margins": 1.251784086227417, "rewards/rejected": -3.3272476196289062, "step": 620 }, { "epoch": 0.6595132164354881, "grad_norm": 56.189305110114034, "learning_rate": 1.5676295169786864e-07, "logits/chosen": 6276.7451171875, "logits/rejected": 4834.232421875, "logps/chosen": -491.4949645996094, "logps/rejected": -613.6256103515625, "loss": 0.4653, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.1036386489868164, "rewards/margins": 1.3933756351470947, "rewards/rejected": -3.497014284133911, "step": 630 }, { "epoch": 0.6699816801884323, "grad_norm": 45.5775233220612, "learning_rate": 1.483436823197092e-07, "logits/chosen": 5295.2880859375, "logits/rejected": 5066.84814453125, "logps/chosen": -478.0670471191406, "logps/rejected": -607.2757568359375, "loss": 0.4756, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.2585527896881104, "rewards/margins": 1.359957218170166, "rewards/rejected": -3.6185097694396973, "step": 640 }, { "epoch": 0.6804501439413766, "grad_norm": 36.958312318449906, "learning_rate": 1.4006036925609243e-07, "logits/chosen": 6228.41259765625, "logits/rejected": 5218.7880859375, "logps/chosen": -527.7664184570312, "logps/rejected": -611.7448120117188, "loss": 0.501, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2601935863494873, "rewards/margins": 1.0959742069244385, "rewards/rejected": -3.3561675548553467, "step": 650 }, { "epoch": 0.6909186076943209, "grad_norm": 38.7446925014424, "learning_rate": 1.319240907040458e-07, "logits/chosen": 5945.08837890625, "logits/rejected": 5132.90283203125, "logps/chosen": -487.3126525878906, "logps/rejected": -570.6744995117188, "loss": 0.4823, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0360536575317383, "rewards/margins": 1.1127293109893799, "rewards/rejected": -3.1487832069396973, "step": 660 }, { "epoch": 0.7013870714472651, "grad_norm": 33.48514110858052, "learning_rate": 1.239457282149695e-07, "logits/chosen": 6214.15380859375, "logits/rejected": 5251.86962890625, "logps/chosen": -535.7365112304688, "logps/rejected": -616.4468383789062, "loss": 0.5021, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.427590847015381, "rewards/margins": 1.0825542211532593, "rewards/rejected": -3.5101447105407715, "step": 670 }, { "epoch": 0.7118555352002094, "grad_norm": 43.79390028546334, "learning_rate": 1.1613595214152711e-07, "logits/chosen": 5851.0458984375, "logits/rejected": 4590.28466796875, "logps/chosen": -617.6078491210938, "logps/rejected": -675.8099365234375, "loss": 0.4637, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -3.118581533432007, "rewards/margins": 0.90425044298172, "rewards/rejected": -4.022831916809082, "step": 680 }, { "epoch": 0.7223239989531536, "grad_norm": 40.2806837745097, "learning_rate": 1.0850520736699362e-07, "logits/chosen": 6110.7978515625, "logits/rejected": 4946.84765625, "logps/chosen": -634.0723876953125, "logps/rejected": -690.5118408203125, "loss": 0.4926, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -3.2604293823242188, "rewards/margins": 1.0428286790847778, "rewards/rejected": -4.303257942199707, "step": 690 }, { "epoch": 0.7327924627060979, "grad_norm": 38.62864426499596, "learning_rate": 1.0106369933615042e-07, "logits/chosen": 5137.73876953125, "logits/rejected": 4863.51220703125, "logps/chosen": -561.9544677734375, "logps/rejected": -646.2843017578125, "loss": 0.5206, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -3.005429744720459, "rewards/margins": 0.919938862323761, "rewards/rejected": -3.925368547439575, "step": 700 }, { "epoch": 0.7432609264590422, "grad_norm": 41.712818545930915, "learning_rate": 9.382138040640714e-08, "logits/chosen": 5658.40087890625, "logits/rejected": 4746.7021484375, "logps/chosen": -526.2380981445312, "logps/rejected": -632.7157592773438, "loss": 0.475, "rewards/accuracies": 0.8125, "rewards/chosen": -2.600060224533081, "rewards/margins": 1.2630447149276733, "rewards/rejected": -3.863105058670044, "step": 710 }, { "epoch": 0.7537293902119864, "grad_norm": 33.829252329277686, "learning_rate": 8.678793653740632e-08, "logits/chosen": 6264.3076171875, "logits/rejected": 5105.57861328125, "logps/chosen": -549.3607788085938, "logps/rejected": -613.100830078125, "loss": 0.4591, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.6160788536071777, "rewards/margins": 0.9087293744087219, "rewards/rejected": -3.524808168411255, "step": 720 }, { "epoch": 0.7641978539649307, "grad_norm": 40.80293648256736, "learning_rate": 7.997277433690983e-08, "logits/chosen": 5647.3720703125, "logits/rejected": 4952.6103515625, "logps/chosen": -586.0548706054688, "logps/rejected": -654.61767578125, "loss": 0.5015, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.7760472297668457, "rewards/margins": 1.1545898914337158, "rewards/rejected": -3.9306368827819824, "step": 730 }, { "epoch": 0.7746663177178749, "grad_norm": 34.84948864651161, "learning_rate": 7.338500848029602e-08, "logits/chosen": 5887.12646484375, "logits/rejected": 5060.0205078125, "logps/chosen": -557.7456665039062, "logps/rejected": -653.4053955078125, "loss": 0.4667, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.71856689453125, "rewards/margins": 1.1988985538482666, "rewards/rejected": -3.9174652099609375, "step": 740 }, { "epoch": 0.7851347814708192, "grad_norm": 35.46968854007604, "learning_rate": 6.70334495204884e-08, "logits/chosen": 5061.13623046875, "logits/rejected": 5132.79931640625, "logps/chosen": -520.9765014648438, "logps/rejected": -627.1702270507812, "loss": 0.4658, "rewards/accuracies": 0.6875, "rewards/chosen": -2.71860933303833, "rewards/margins": 0.979376494884491, "rewards/rejected": -3.697985887527466, "step": 750 }, { "epoch": 0.7956032452237635, "grad_norm": 48.34021796523379, "learning_rate": 6.092659210462231e-08, "logits/chosen": 5833.1083984375, "logits/rejected": 5685.17529296875, "logps/chosen": -551.7678833007812, "logps/rejected": -651.6838989257812, "loss": 0.4842, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.6594486236572266, "rewards/margins": 0.9747648239135742, "rewards/rejected": -3.63421368598938, "step": 760 }, { "epoch": 0.8060717089767077, "grad_norm": 47.3104118514141, "learning_rate": 5.507260361320737e-08, "logits/chosen": 5969.6669921875, "logits/rejected": 5309.11328125, "logps/chosen": -526.6469116210938, "logps/rejected": -625.7551879882812, "loss": 0.4887, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.460120439529419, "rewards/margins": 1.069040060043335, "rewards/rejected": -3.529160261154175, "step": 770 }, { "epoch": 0.816540172729652, "grad_norm": 45.69100735775922, "learning_rate": 4.947931323697982e-08, "logits/chosen": 5785.0791015625, "logits/rejected": 4540.7138671875, "logps/chosen": -506.1707458496094, "logps/rejected": -593.4022827148438, "loss": 0.4742, "rewards/accuracies": 0.78125, "rewards/chosen": -2.2249889373779297, "rewards/margins": 1.1954861879348755, "rewards/rejected": -3.4204750061035156, "step": 780 }, { "epoch": 0.8270086364825961, "grad_norm": 39.287410159706006, "learning_rate": 4.415420150605398e-08, "logits/chosen": 5633.6611328125, "logits/rejected": 4359.56884765625, "logps/chosen": -483.2314453125, "logps/rejected": -545.5601806640625, "loss": 0.4734, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.17476224899292, "rewards/margins": 1.0907633304595947, "rewards/rejected": -3.2655253410339355, "step": 790 }, { "epoch": 0.8374771002355405, "grad_norm": 49.05878566646651, "learning_rate": 3.9104390285376374e-08, "logits/chosen": 5795.65966796875, "logits/rejected": 5147.58349609375, "logps/chosen": -499.25677490234375, "logps/rejected": -596.9561767578125, "loss": 0.4915, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.291295051574707, "rewards/margins": 1.0709164142608643, "rewards/rejected": -3.3622117042541504, "step": 800 }, { "epoch": 0.8479455639884846, "grad_norm": 47.0245444267598, "learning_rate": 3.433663324986208e-08, "logits/chosen": 5386.24365234375, "logits/rejected": 4799.34619140625, "logps/chosen": -502.5039978027344, "logps/rejected": -604.8697509765625, "loss": 0.4827, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.4322919845581055, "rewards/margins": 1.050264596939087, "rewards/rejected": -3.4825565814971924, "step": 810 }, { "epoch": 0.8584140277414289, "grad_norm": 34.276844653382305, "learning_rate": 2.9857306851953897e-08, "logits/chosen": 6096.72265625, "logits/rejected": 5434.0751953125, "logps/chosen": -497.5298767089844, "logps/rejected": -586.8197631835938, "loss": 0.5033, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.3181862831115723, "rewards/margins": 1.0235309600830078, "rewards/rejected": -3.34171724319458, "step": 820 }, { "epoch": 0.8688824914943732, "grad_norm": 27.572151879129066, "learning_rate": 2.567240179368185e-08, "logits/chosen": 6363.4453125, "logits/rejected": 5032.39892578125, "logps/chosen": -525.058349609375, "logps/rejected": -579.8884887695312, "loss": 0.4687, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.241785764694214, "rewards/margins": 1.0823127031326294, "rewards/rejected": -3.3240981101989746, "step": 830 }, { "epoch": 0.8793509552473174, "grad_norm": 32.89947363407846, "learning_rate": 2.1787515014630357e-08, "logits/chosen": 5497.65478515625, "logits/rejected": 5047.75927734375, "logps/chosen": -474.67144775390625, "logps/rejected": -564.0628662109375, "loss": 0.5059, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1919493675231934, "rewards/margins": 0.9676748514175415, "rewards/rejected": -3.1596243381500244, "step": 840 }, { "epoch": 0.8898194190002617, "grad_norm": 39.62997754950574, "learning_rate": 1.820784220652766e-08, "logits/chosen": 6004.0341796875, "logits/rejected": 5342.8115234375, "logps/chosen": -550.6289672851562, "logps/rejected": -619.5693969726562, "loss": 0.4571, "rewards/accuracies": 0.78125, "rewards/chosen": -2.4239485263824463, "rewards/margins": 1.1405421495437622, "rewards/rejected": -3.564490556716919, "step": 850 }, { "epoch": 0.9002878827532059, "grad_norm": 30.37265957733073, "learning_rate": 1.4938170864468636e-08, "logits/chosen": 5734.85009765625, "logits/rejected": 4694.92822265625, "logps/chosen": -500.99285888671875, "logps/rejected": -600.2783813476562, "loss": 0.4557, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.366197347640991, "rewards/margins": 1.1965372562408447, "rewards/rejected": -3.562734603881836, "step": 860 }, { "epoch": 0.9107563465061502, "grad_norm": 42.8728344949466, "learning_rate": 1.1982873884064465e-08, "logits/chosen": 4703.9013671875, "logits/rejected": 4299.08837890625, "logps/chosen": -462.556640625, "logps/rejected": -579.5667724609375, "loss": 0.4866, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.3841712474823, "rewards/margins": 1.0620203018188477, "rewards/rejected": -3.4461913108825684, "step": 870 }, { "epoch": 0.9212248102590945, "grad_norm": 30.16704605166611, "learning_rate": 9.345903713082304e-09, "logits/chosen": 5846.646484375, "logits/rejected": 5363.2783203125, "logps/chosen": -519.5308227539062, "logps/rejected": -640.5654296875, "loss": 0.4795, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.3675999641418457, "rewards/margins": 1.2334234714508057, "rewards/rejected": -3.6010231971740723, "step": 880 }, { "epoch": 0.9316932740120387, "grad_norm": 42.051094279847355, "learning_rate": 7.030787065396865e-09, "logits/chosen": 6008.31201171875, "logits/rejected": 5258.09423828125, "logps/chosen": -548.2977905273438, "logps/rejected": -650.9564208984375, "loss": 0.4731, "rewards/accuracies": 0.75, "rewards/chosen": -2.4404239654541016, "rewards/margins": 1.2035192251205444, "rewards/rejected": -3.6439430713653564, "step": 890 }, { "epoch": 0.942161737764983, "grad_norm": 38.027332059671004, "learning_rate": 5.04062020432286e-09, "logits/chosen": 5349.552734375, "logits/rejected": 5041.7802734375, "logps/chosen": -496.6697692871094, "logps/rejected": -612.8040161132812, "loss": 0.4677, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.3748703002929688, "rewards/margins": 1.0867860317230225, "rewards/rejected": -3.4616565704345703, "step": 900 }, { "epoch": 0.9526302015179272, "grad_norm": 45.729177362308796, "learning_rate": 3.3780648016376866e-09, "logits/chosen": 5641.29150390625, "logits/rejected": 4916.0888671875, "logps/chosen": -519.3919677734375, "logps/rejected": -579.9984130859375, "loss": 0.4961, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.520193099975586, "rewards/margins": 0.9546852111816406, "rewards/rejected": -3.4748783111572266, "step": 910 }, { "epoch": 0.9630986652708715, "grad_norm": 36.35753578021077, "learning_rate": 2.0453443778310766e-09, "logits/chosen": 5727.46484375, "logits/rejected": 4886.53955078125, "logps/chosen": -521.6600341796875, "logps/rejected": -615.242431640625, "loss": 0.4727, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.396231174468994, "rewards/margins": 1.1399519443511963, "rewards/rejected": -3.5361828804016113, "step": 920 }, { "epoch": 0.9735671290238157, "grad_norm": 35.74381942321771, "learning_rate": 1.0442413283435758e-09, "logits/chosen": 6282.48046875, "logits/rejected": 5133.5615234375, "logps/chosen": -544.6215209960938, "logps/rejected": -644.2821044921875, "loss": 0.4625, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -2.2689859867095947, "rewards/margins": 1.2234214544296265, "rewards/rejected": -3.4924075603485107, "step": 930 }, { "epoch": 0.98403559277676, "grad_norm": 49.20659744895587, "learning_rate": 3.760945397705828e-10, "logits/chosen": 6267.5966796875, "logits/rejected": 5505.8017578125, "logps/chosen": -530.5960693359375, "logps/rejected": -629.7965087890625, "loss": 0.4959, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.401353597640991, "rewards/margins": 0.9310855865478516, "rewards/rejected": -3.3324389457702637, "step": 940 }, { "epoch": 0.9945040565297043, "grad_norm": 34.18544249864759, "learning_rate": 4.17975992204056e-11, "logits/chosen": 6401.61474609375, "logits/rejected": 5400.9697265625, "logps/chosen": -558.7767944335938, "logps/rejected": -602.9310302734375, "loss": 0.4907, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.3211910724639893, "rewards/margins": 1.0175538063049316, "rewards/rejected": -3.3387451171875, "step": 950 }, { "epoch": 0.9997382884061764, "step": 955, "total_flos": 0.0, "train_loss": 0.5214411970208452, "train_runtime": 13074.5389, "train_samples_per_second": 4.676, "train_steps_per_second": 0.073 } ], "logging_steps": 10, "max_steps": 955, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }