{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3089005235602094, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "logits/chosen": -2.729219675064087, "logits/rejected": -2.713034152984619, "logps/chosen": -183.00042724609375, "logps/rejected": -183.33316040039062, "loss": 0.6973, "rewards/accuracies": 0.5625, "rewards/chosen": -0.004850482568144798, "rewards/margins": -0.007815884426236153, "rewards/rejected": 0.0029654023237526417, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "logits/chosen": -2.480727195739746, "logits/rejected": -2.563934564590454, "logps/chosen": -159.55963134765625, "logps/rejected": -157.36929321289062, "loss": 0.6902, "rewards/accuracies": 0.625, "rewards/chosen": 0.004495501983910799, "rewards/margins": 0.006143546663224697, "rewards/rejected": -0.0016480451449751854, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.5e-06, "logits/chosen": -2.856149911880493, "logits/rejected": -2.8624300956726074, "logps/chosen": -241.56802368164062, "logps/rejected": -251.95797729492188, "loss": 0.6969, "rewards/accuracies": 0.375, "rewards/chosen": -0.01090860366821289, "rewards/margins": -0.007115649990737438, "rewards/rejected": -0.0037929536774754524, "step": 3 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -2.473580837249756, "logits/rejected": -2.6020100116729736, "logps/chosen": -138.55348205566406, "logps/rejected": -167.7603759765625, "loss": 0.6844, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01281428337097168, "rewards/margins": 0.01771531254053116, "rewards/rejected": -0.0049010273069143295, "step": 4 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "logits/chosen": -2.446133613586426, "logits/rejected": -2.5022342205047607, "logps/chosen": -140.56512451171875, "logps/rejected": -178.04331970214844, "loss": 0.6995, "rewards/accuracies": 0.375, "rewards/chosen": -0.013703584671020508, "rewards/margins": -0.012241363525390625, "rewards/rejected": -0.001462221029214561, "step": 5 }, { "epoch": 0.01, "learning_rate": 3e-06, "logits/chosen": -2.5403974056243896, "logits/rejected": -2.650925874710083, "logps/chosen": -162.82369995117188, "logps/rejected": -214.57489013671875, "loss": 0.6909, "rewards/accuracies": 0.5, "rewards/chosen": 0.003328490536659956, "rewards/margins": 0.004930590279400349, "rewards/rejected": -0.0016021011397242546, "step": 6 }, { "epoch": 0.01, "learning_rate": 3.5000000000000004e-06, "logits/chosen": -2.6547656059265137, "logits/rejected": -2.577648162841797, "logps/chosen": -219.3771514892578, "logps/rejected": -215.02362060546875, "loss": 0.6931, "rewards/accuracies": 0.4375, "rewards/chosen": 0.0027452707290649414, "rewards/margins": 0.0002449510502628982, "rewards/rejected": 0.0025003196205943823, "step": 7 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "logits/chosen": -2.5475914478302, "logits/rejected": -2.586148500442505, "logps/chosen": -214.5223388671875, "logps/rejected": -236.43626403808594, "loss": 0.6928, "rewards/accuracies": 0.5, "rewards/chosen": 0.007447218522429466, "rewards/margins": 0.0010163071565330029, "rewards/rejected": 0.006430912297219038, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.5e-06, "logits/chosen": -2.657726526260376, "logits/rejected": -2.7398831844329834, "logps/chosen": -158.13832092285156, "logps/rejected": -176.91400146484375, "loss": 0.6877, "rewards/accuracies": 0.5, "rewards/chosen": 0.009470987133681774, "rewards/margins": 0.011165929958224297, "rewards/rejected": -0.0016949418932199478, "step": 9 }, { "epoch": 0.01, "learning_rate": 5e-06, "logits/chosen": -2.3761770725250244, "logits/rejected": -2.4064137935638428, "logps/chosen": -176.59835815429688, "logps/rejected": -163.67300415039062, "loss": 0.6909, "rewards/accuracies": 0.5, "rewards/chosen": 0.002156305592507124, "rewards/margins": 0.004867983516305685, "rewards/rejected": -0.002711677923798561, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.500000000000001e-06, "logits/chosen": -2.6009013652801514, "logits/rejected": -2.645084857940674, "logps/chosen": -197.91195678710938, "logps/rejected": -245.907470703125, "loss": 0.695, "rewards/accuracies": 0.625, "rewards/chosen": -0.001973224338144064, "rewards/margins": -0.0031423806212842464, "rewards/rejected": 0.0011691567488014698, "step": 11 }, { "epoch": 0.02, "learning_rate": 6e-06, "logits/chosen": -2.70407772064209, "logits/rejected": -2.713822364807129, "logps/chosen": -190.91189575195312, "logps/rejected": -190.74317932128906, "loss": 0.6921, "rewards/accuracies": 0.375, "rewards/chosen": 0.0004999642260372639, "rewards/margins": 0.002835321705788374, "rewards/rejected": -0.00233535747975111, "step": 12 }, { "epoch": 0.02, "learning_rate": 6.5000000000000004e-06, "logits/chosen": -2.585596799850464, "logits/rejected": -2.6356894969940186, "logps/chosen": -214.14459228515625, "logps/rejected": -242.9970245361328, "loss": 0.687, "rewards/accuracies": 0.6875, "rewards/chosen": -0.006649780552834272, "rewards/margins": 0.01303944643586874, "rewards/rejected": -0.019689226523041725, "step": 13 }, { "epoch": 0.02, "learning_rate": 7.000000000000001e-06, "logits/chosen": -2.7044496536254883, "logits/rejected": -2.6631391048431396, "logps/chosen": -183.95582580566406, "logps/rejected": -169.8933868408203, "loss": 0.6967, "rewards/accuracies": 0.4375, "rewards/chosen": -0.01296384446322918, "rewards/margins": -0.006747078616172075, "rewards/rejected": -0.0062167649157345295, "step": 14 }, { "epoch": 0.02, "learning_rate": 7.5e-06, "logits/chosen": -2.6691248416900635, "logits/rejected": -2.6515817642211914, "logps/chosen": -161.9134979248047, "logps/rejected": -170.60101318359375, "loss": 0.6866, "rewards/accuracies": 0.75, "rewards/chosen": 0.009864186868071556, "rewards/margins": 0.01341402530670166, "rewards/rejected": -0.0035498379729688168, "step": 15 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-06, "logits/chosen": -2.6293349266052246, "logits/rejected": -2.652617931365967, "logps/chosen": -161.3071746826172, "logps/rejected": -169.07638549804688, "loss": 0.6808, "rewards/accuracies": 0.75, "rewards/chosen": 0.005685711745172739, "rewards/margins": 0.025187280029058456, "rewards/rejected": -0.019501566886901855, "step": 16 }, { "epoch": 0.02, "learning_rate": 8.500000000000002e-06, "logits/chosen": -2.6463351249694824, "logits/rejected": -2.696180582046509, "logps/chosen": -154.61085510253906, "logps/rejected": -148.3529510498047, "loss": 0.6916, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0029291389510035515, "rewards/margins": 0.0034487005323171616, "rewards/rejected": -0.0005195615813136101, "step": 17 }, { "epoch": 0.02, "learning_rate": 9e-06, "logits/chosen": -2.5302553176879883, "logits/rejected": -2.4991636276245117, "logps/chosen": -152.20655822753906, "logps/rejected": -131.0479278564453, "loss": 0.6954, "rewards/accuracies": 0.5625, "rewards/chosen": -0.009920882061123848, "rewards/margins": -0.004111624322831631, "rewards/rejected": -0.005809259135276079, "step": 18 }, { "epoch": 0.02, "learning_rate": 9.5e-06, "logits/chosen": -2.5337584018707275, "logits/rejected": -2.6624388694763184, "logps/chosen": -160.88140869140625, "logps/rejected": -195.38058471679688, "loss": 0.6829, "rewards/accuracies": 0.75, "rewards/chosen": 1.540081575512886e-05, "rewards/margins": 0.021059704944491386, "rewards/rejected": -0.021044302731752396, "step": 19 }, { "epoch": 0.03, "learning_rate": 1e-05, "logits/chosen": -2.7187581062316895, "logits/rejected": -2.699402093887329, "logps/chosen": -184.01405334472656, "logps/rejected": -200.99124145507812, "loss": 0.6898, "rewards/accuracies": 0.5, "rewards/chosen": -0.007425189018249512, "rewards/margins": 0.007250881753861904, "rewards/rejected": -0.014676070772111416, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.05e-05, "logits/chosen": -2.5241212844848633, "logits/rejected": -2.5988845825195312, "logps/chosen": -181.3677215576172, "logps/rejected": -156.8072509765625, "loss": 0.687, "rewards/accuracies": 0.6875, "rewards/chosen": 0.006232499144971371, "rewards/margins": 0.01287851296365261, "rewards/rejected": -0.006646013353019953, "step": 21 }, { "epoch": 0.03, "learning_rate": 1.1000000000000001e-05, "logits/chosen": -2.531973361968994, "logits/rejected": -2.594179153442383, "logps/chosen": -166.6884307861328, "logps/rejected": -173.5614013671875, "loss": 0.6942, "rewards/accuracies": 0.5, "rewards/chosen": -0.007440138608217239, "rewards/margins": -0.0019538167398422956, "rewards/rejected": -0.0054863216355443, "step": 22 }, { "epoch": 0.03, "learning_rate": 1.1500000000000002e-05, "logits/chosen": -2.4815938472747803, "logits/rejected": -2.4820916652679443, "logps/chosen": -131.22227478027344, "logps/rejected": -131.2086181640625, "loss": 0.6915, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0006843092851340771, "rewards/margins": 0.003669046564027667, "rewards/rejected": -0.004353356547653675, "step": 23 }, { "epoch": 0.03, "learning_rate": 1.2e-05, "logits/chosen": -2.4899590015411377, "logits/rejected": -2.5294911861419678, "logps/chosen": -149.33543395996094, "logps/rejected": -141.8245391845703, "loss": 0.6983, "rewards/accuracies": 0.3125, "rewards/chosen": -0.017502309754490852, "rewards/margins": -0.010094404220581055, "rewards/rejected": -0.0074079036712646484, "step": 24 }, { "epoch": 0.03, "learning_rate": 1.25e-05, "logits/chosen": -2.5714809894561768, "logits/rejected": -2.5547332763671875, "logps/chosen": -167.8873291015625, "logps/rejected": -173.3172149658203, "loss": 0.69, "rewards/accuracies": 0.5, "rewards/chosen": -0.00382807245478034, "rewards/margins": 0.006511807441711426, "rewards/rejected": -0.010339880362153053, "step": 25 }, { "epoch": 0.03, "learning_rate": 1.3000000000000001e-05, "logits/chosen": -2.470759630203247, "logits/rejected": -2.4105560779571533, "logps/chosen": -174.09585571289062, "logps/rejected": -175.88677978515625, "loss": 0.6914, "rewards/accuracies": 0.5, "rewards/chosen": 0.0010800834279507399, "rewards/margins": 0.004079150035977364, "rewards/rejected": -0.0029990668408572674, "step": 26 }, { "epoch": 0.04, "learning_rate": 1.3500000000000001e-05, "logits/chosen": -2.6590945720672607, "logits/rejected": -2.649517059326172, "logps/chosen": -174.6903839111328, "logps/rejected": -165.15533447265625, "loss": 0.69, "rewards/accuracies": 0.6875, "rewards/chosen": -0.012396741658449173, "rewards/margins": 0.006480884738266468, "rewards/rejected": -0.018877625465393066, "step": 27 }, { "epoch": 0.04, "learning_rate": 1.4000000000000001e-05, "logits/chosen": -2.517886161804199, "logits/rejected": -2.6399176120758057, "logps/chosen": -154.28765869140625, "logps/rejected": -181.42474365234375, "loss": 0.6867, "rewards/accuracies": 0.6875, "rewards/chosen": -0.01030280627310276, "rewards/margins": 0.01349327526986599, "rewards/rejected": -0.0237960796803236, "step": 28 }, { "epoch": 0.04, "learning_rate": 1.45e-05, "logits/chosen": -2.5826056003570557, "logits/rejected": -2.5789594650268555, "logps/chosen": -157.37286376953125, "logps/rejected": -154.72024536132812, "loss": 0.6894, "rewards/accuracies": 0.5, "rewards/chosen": -0.016228055581450462, "rewards/margins": 0.007857107557356358, "rewards/rejected": -0.024085164070129395, "step": 29 }, { "epoch": 0.04, "learning_rate": 1.5e-05, "logits/chosen": -2.6575980186462402, "logits/rejected": -2.704176664352417, "logps/chosen": -159.80322265625, "logps/rejected": -165.82786560058594, "loss": 0.6829, "rewards/accuracies": 0.6875, "rewards/chosen": -0.00888748187571764, "rewards/margins": 0.02125699445605278, "rewards/rejected": -0.030144479125738144, "step": 30 }, { "epoch": 0.04, "learning_rate": 1.55e-05, "logits/chosen": -2.650428533554077, "logits/rejected": -2.693262815475464, "logps/chosen": -164.53692626953125, "logps/rejected": -174.57235717773438, "loss": 0.6852, "rewards/accuracies": 0.5625, "rewards/chosen": 0.001265859231352806, "rewards/margins": 0.01664908044040203, "rewards/rejected": -0.01538322027772665, "step": 31 }, { "epoch": 0.04, "learning_rate": 1.6000000000000003e-05, "logits/chosen": -2.6280901432037354, "logits/rejected": -2.6152122020721436, "logps/chosen": -174.9736328125, "logps/rejected": -209.5363311767578, "loss": 0.6859, "rewards/accuracies": 0.6875, "rewards/chosen": -0.013908598572015762, "rewards/margins": 0.016062045469880104, "rewards/rejected": -0.029970645904541016, "step": 32 }, { "epoch": 0.04, "learning_rate": 1.65e-05, "logits/chosen": -2.5862505435943604, "logits/rejected": -2.5280563831329346, "logps/chosen": -160.01731872558594, "logps/rejected": -172.03590393066406, "loss": 0.694, "rewards/accuracies": 0.625, "rewards/chosen": -0.03881430625915527, "rewards/margins": -0.0009270897135138512, "rewards/rejected": -0.03788721561431885, "step": 33 }, { "epoch": 0.04, "learning_rate": 1.7000000000000003e-05, "logits/chosen": -2.522751808166504, "logits/rejected": -2.49782657623291, "logps/chosen": -167.8050537109375, "logps/rejected": -177.90399169921875, "loss": 0.7061, "rewards/accuracies": 0.1875, "rewards/chosen": -0.031221888959407806, "rewards/margins": -0.02547764778137207, "rewards/rejected": -0.005744242575019598, "step": 34 }, { "epoch": 0.05, "learning_rate": 1.75e-05, "logits/chosen": -2.557039260864258, "logits/rejected": -2.4520561695098877, "logps/chosen": -143.8395538330078, "logps/rejected": -140.37631225585938, "loss": 0.6944, "rewards/accuracies": 0.4375, "rewards/chosen": -0.039483096450567245, "rewards/margins": -0.0023098706733435392, "rewards/rejected": -0.03717322647571564, "step": 35 }, { "epoch": 0.05, "learning_rate": 1.8e-05, "logits/chosen": -2.6500961780548096, "logits/rejected": -2.5340402126312256, "logps/chosen": -172.447998046875, "logps/rejected": -198.5001678466797, "loss": 0.7036, "rewards/accuracies": 0.5, "rewards/chosen": -0.035362813621759415, "rewards/margins": -0.020008588209748268, "rewards/rejected": -0.015354226343333721, "step": 36 }, { "epoch": 0.05, "learning_rate": 1.85e-05, "logits/chosen": -2.6206865310668945, "logits/rejected": -2.6976592540740967, "logps/chosen": -167.8064422607422, "logps/rejected": -185.81964111328125, "loss": 0.6987, "rewards/accuracies": 0.4375, "rewards/chosen": -0.03894533962011337, "rewards/margins": -0.009856510907411575, "rewards/rejected": -0.029088832437992096, "step": 37 }, { "epoch": 0.05, "learning_rate": 1.9e-05, "logits/chosen": -2.434297800064087, "logits/rejected": -2.651834487915039, "logps/chosen": -169.50946044921875, "logps/rejected": -257.7209167480469, "loss": 0.6941, "rewards/accuracies": 0.4375, "rewards/chosen": -0.03687644004821777, "rewards/margins": -0.0012842637952417135, "rewards/rejected": -0.035592176020145416, "step": 38 }, { "epoch": 0.05, "learning_rate": 1.9500000000000003e-05, "logits/chosen": -2.4155445098876953, "logits/rejected": -2.487833023071289, "logps/chosen": -180.0078582763672, "logps/rejected": -207.1255340576172, "loss": 0.6978, "rewards/accuracies": 0.375, "rewards/chosen": -0.037531279027462006, "rewards/margins": -0.008796263486146927, "rewards/rejected": -0.028735019266605377, "step": 39 }, { "epoch": 0.05, "learning_rate": 2e-05, "logits/chosen": -2.6476895809173584, "logits/rejected": -2.707326650619507, "logps/chosen": -182.0277862548828, "logps/rejected": -180.2669677734375, "loss": 0.6857, "rewards/accuracies": 0.5, "rewards/chosen": -0.029536105692386627, "rewards/margins": 0.015740489587187767, "rewards/rejected": -0.04527659714221954, "step": 40 }, { "epoch": 0.05, "learning_rate": 2.05e-05, "logits/chosen": -2.691012144088745, "logits/rejected": -2.688505172729492, "logps/chosen": -173.92041015625, "logps/rejected": -208.61599731445312, "loss": 0.6781, "rewards/accuracies": 0.5625, "rewards/chosen": -0.01940765418112278, "rewards/margins": 0.03146040812134743, "rewards/rejected": -0.05086805671453476, "step": 41 }, { "epoch": 0.05, "learning_rate": 2.1e-05, "logits/chosen": -2.619537353515625, "logits/rejected": -2.6051697731018066, "logps/chosen": -193.06666564941406, "logps/rejected": -178.87713623046875, "loss": 0.699, "rewards/accuracies": 0.375, "rewards/chosen": -0.035286471247673035, "rewards/margins": -0.010646676644682884, "rewards/rejected": -0.0246397964656353, "step": 42 }, { "epoch": 0.06, "learning_rate": 2.15e-05, "logits/chosen": -2.611173391342163, "logits/rejected": -2.6004340648651123, "logps/chosen": -188.3818817138672, "logps/rejected": -214.30926513671875, "loss": 0.6674, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0015826929593458772, "rewards/margins": 0.05373835563659668, "rewards/rejected": -0.05215566232800484, "step": 43 }, { "epoch": 0.06, "learning_rate": 2.2000000000000003e-05, "logits/chosen": -2.667587995529175, "logits/rejected": -2.688136339187622, "logps/chosen": -188.89466857910156, "logps/rejected": -207.63580322265625, "loss": 0.6982, "rewards/accuracies": 0.375, "rewards/chosen": -0.04630360007286072, "rewards/margins": -0.008599425666034222, "rewards/rejected": -0.03770418092608452, "step": 44 }, { "epoch": 0.06, "learning_rate": 2.25e-05, "logits/chosen": -2.5165350437164307, "logits/rejected": -2.441213607788086, "logps/chosen": -170.480712890625, "logps/rejected": -159.08981323242188, "loss": 0.6945, "rewards/accuracies": 0.625, "rewards/chosen": -0.03015182353556156, "rewards/margins": -0.0018717292696237564, "rewards/rejected": -0.028280090540647507, "step": 45 }, { "epoch": 0.06, "learning_rate": 2.3000000000000003e-05, "logits/chosen": -2.523725748062134, "logits/rejected": -2.6105730533599854, "logps/chosen": -170.0879364013672, "logps/rejected": -197.1090087890625, "loss": 0.6884, "rewards/accuracies": 0.5625, "rewards/chosen": -0.022913338616490364, "rewards/margins": 0.010608267970383167, "rewards/rejected": -0.033521607518196106, "step": 46 }, { "epoch": 0.06, "learning_rate": 2.35e-05, "logits/chosen": -2.5311026573181152, "logits/rejected": -2.54695463180542, "logps/chosen": -178.0673065185547, "logps/rejected": -182.32875061035156, "loss": 0.6816, "rewards/accuracies": 0.625, "rewards/chosen": -0.008729481138288975, "rewards/margins": 0.02449822425842285, "rewards/rejected": -0.03322770446538925, "step": 47 }, { "epoch": 0.06, "learning_rate": 2.4e-05, "logits/chosen": -2.6589465141296387, "logits/rejected": -2.6210758686065674, "logps/chosen": -182.40536499023438, "logps/rejected": -169.12103271484375, "loss": 0.6799, "rewards/accuracies": 0.6875, "rewards/chosen": -0.003579068696126342, "rewards/margins": 0.02751018851995468, "rewards/rejected": -0.031089257448911667, "step": 48 }, { "epoch": 0.06, "learning_rate": 2.45e-05, "logits/chosen": -2.513227701187134, "logits/rejected": -2.543900489807129, "logps/chosen": -170.51919555664062, "logps/rejected": -168.29690551757812, "loss": 0.7047, "rewards/accuracies": 0.1875, "rewards/chosen": -0.021081138402223587, "rewards/margins": -0.022327663376927376, "rewards/rejected": 0.0012465240433812141, "step": 49 }, { "epoch": 0.07, "learning_rate": 2.5e-05, "logits/chosen": -2.434335470199585, "logits/rejected": -2.4906935691833496, "logps/chosen": -162.16989135742188, "logps/rejected": -212.56082153320312, "loss": 0.6807, "rewards/accuracies": 0.625, "rewards/chosen": -0.027100684121251106, "rewards/margins": 0.02660501003265381, "rewards/rejected": -0.053705692291259766, "step": 50 }, { "epoch": 0.07, "learning_rate": 2.5500000000000003e-05, "logits/chosen": -2.536504030227661, "logits/rejected": -2.6495611667633057, "logps/chosen": -171.29580688476562, "logps/rejected": -191.54605102539062, "loss": 0.6815, "rewards/accuracies": 0.6875, "rewards/chosen": -0.025197075679898262, "rewards/margins": 0.024894431233406067, "rewards/rejected": -0.05009150505065918, "step": 51 }, { "epoch": 0.07, "learning_rate": 2.6000000000000002e-05, "logits/chosen": -2.6077020168304443, "logits/rejected": -2.564440965652466, "logps/chosen": -170.64273071289062, "logps/rejected": -217.3800811767578, "loss": 0.7054, "rewards/accuracies": 0.375, "rewards/chosen": -0.029342200607061386, "rewards/margins": -0.02205488458275795, "rewards/rejected": -0.007287311367690563, "step": 52 }, { "epoch": 0.07, "learning_rate": 2.6500000000000004e-05, "logits/chosen": -2.4113476276397705, "logits/rejected": -2.4167730808258057, "logps/chosen": -158.03866577148438, "logps/rejected": -203.66368103027344, "loss": 0.7, "rewards/accuracies": 0.5625, "rewards/chosen": -0.03733339160680771, "rewards/margins": -0.011522197164595127, "rewards/rejected": -0.025811197236180305, "step": 53 }, { "epoch": 0.07, "learning_rate": 2.7000000000000002e-05, "logits/chosen": -2.5948705673217773, "logits/rejected": -2.584592342376709, "logps/chosen": -163.16751098632812, "logps/rejected": -196.3957061767578, "loss": 0.687, "rewards/accuracies": 0.5625, "rewards/chosen": -0.050568290054798126, "rewards/margins": 0.014712072908878326, "rewards/rejected": -0.06528037041425705, "step": 54 }, { "epoch": 0.07, "learning_rate": 2.7500000000000004e-05, "logits/chosen": -2.524641513824463, "logits/rejected": -2.445185661315918, "logps/chosen": -167.29495239257812, "logps/rejected": -178.15013122558594, "loss": 0.694, "rewards/accuracies": 0.5, "rewards/chosen": -0.03202950954437256, "rewards/margins": 0.00026745768263936043, "rewards/rejected": -0.03229696676135063, "step": 55 }, { "epoch": 0.07, "learning_rate": 2.8000000000000003e-05, "logits/chosen": -2.7647242546081543, "logits/rejected": -2.721259117126465, "logps/chosen": -179.40574645996094, "logps/rejected": -180.4486541748047, "loss": 0.6771, "rewards/accuracies": 0.75, "rewards/chosen": -0.0023113014176487923, "rewards/margins": 0.03354344516992569, "rewards/rejected": -0.035854749381542206, "step": 56 }, { "epoch": 0.07, "learning_rate": 2.8499999999999998e-05, "logits/chosen": -2.676018714904785, "logits/rejected": -2.6648664474487305, "logps/chosen": -197.95919799804688, "logps/rejected": -175.45230102539062, "loss": 0.6764, "rewards/accuracies": 0.6875, "rewards/chosen": -0.00892419833689928, "rewards/margins": 0.036644406616687775, "rewards/rejected": -0.04556860774755478, "step": 57 }, { "epoch": 0.08, "learning_rate": 2.9e-05, "logits/chosen": -2.644045829772949, "logits/rejected": -2.7341084480285645, "logps/chosen": -180.7408905029297, "logps/rejected": -189.23818969726562, "loss": 0.6963, "rewards/accuracies": 0.5, "rewards/chosen": -0.0153807383030653, "rewards/margins": -0.005131007172167301, "rewards/rejected": -0.010249733924865723, "step": 58 }, { "epoch": 0.08, "learning_rate": 2.95e-05, "logits/chosen": -2.6532645225524902, "logits/rejected": -2.552724599838257, "logps/chosen": -178.77191162109375, "logps/rejected": -175.42742919921875, "loss": 0.6841, "rewards/accuracies": 0.625, "rewards/chosen": -0.03405280038714409, "rewards/margins": 0.020096803084015846, "rewards/rejected": -0.054149605333805084, "step": 59 }, { "epoch": 0.08, "learning_rate": 3e-05, "logits/chosen": -2.5994341373443604, "logits/rejected": -2.6342806816101074, "logps/chosen": -176.28402709960938, "logps/rejected": -210.64498901367188, "loss": 0.6975, "rewards/accuracies": 0.5625, "rewards/chosen": -0.025557922199368477, "rewards/margins": -0.004868890158832073, "rewards/rejected": -0.02068903297185898, "step": 60 }, { "epoch": 0.08, "learning_rate": 3.05e-05, "logits/chosen": -2.58650279045105, "logits/rejected": -2.7233827114105225, "logps/chosen": -184.57443237304688, "logps/rejected": -175.98129272460938, "loss": 0.7081, "rewards/accuracies": 0.3125, "rewards/chosen": -0.06466653198003769, "rewards/margins": -0.02823822945356369, "rewards/rejected": -0.0364283062517643, "step": 61 }, { "epoch": 0.08, "learning_rate": 3.1e-05, "logits/chosen": -2.6913092136383057, "logits/rejected": -2.5653786659240723, "logps/chosen": -203.69729614257812, "logps/rejected": -190.5261688232422, "loss": 0.7161, "rewards/accuracies": 0.25, "rewards/chosen": -0.04825315251946449, "rewards/margins": -0.04361088201403618, "rewards/rejected": -0.004642271436750889, "step": 62 }, { "epoch": 0.08, "learning_rate": 3.15e-05, "logits/chosen": -2.666816473007202, "logits/rejected": -2.604631185531616, "logps/chosen": -225.96981811523438, "logps/rejected": -193.25982666015625, "loss": 0.7001, "rewards/accuracies": 0.4375, "rewards/chosen": -0.040044669061899185, "rewards/margins": -0.012522673234343529, "rewards/rejected": -0.027521992102265358, "step": 63 }, { "epoch": 0.08, "learning_rate": 3.2000000000000005e-05, "logits/chosen": -2.6570017337799072, "logits/rejected": -2.633661985397339, "logps/chosen": -188.25892639160156, "logps/rejected": -171.55096435546875, "loss": 0.7173, "rewards/accuracies": 0.3125, "rewards/chosen": -0.08987895399332047, "rewards/margins": -0.04469916597008705, "rewards/rejected": -0.04517979919910431, "step": 64 }, { "epoch": 0.09, "learning_rate": 3.2500000000000004e-05, "logits/chosen": -2.582909107208252, "logits/rejected": -2.587308883666992, "logps/chosen": -168.5953369140625, "logps/rejected": -180.11024475097656, "loss": 0.689, "rewards/accuracies": 0.5, "rewards/chosen": -0.038271259516477585, "rewards/margins": 0.01016156654804945, "rewards/rejected": -0.04843283072113991, "step": 65 }, { "epoch": 0.09, "learning_rate": 3.3e-05, "logits/chosen": -2.7339940071105957, "logits/rejected": -2.6128218173980713, "logps/chosen": -187.80320739746094, "logps/rejected": -176.04495239257812, "loss": 0.6948, "rewards/accuracies": 0.4375, "rewards/chosen": -0.08385930210351944, "rewards/margins": -0.0003412736114114523, "rewards/rejected": -0.08351802825927734, "step": 66 }, { "epoch": 0.09, "learning_rate": 3.35e-05, "logits/chosen": -2.406219482421875, "logits/rejected": -2.435150623321533, "logps/chosen": -150.17405700683594, "logps/rejected": -174.16119384765625, "loss": 0.68, "rewards/accuracies": 0.5625, "rewards/chosen": -0.030688336119055748, "rewards/margins": 0.027779744938015938, "rewards/rejected": -0.058468081057071686, "step": 67 }, { "epoch": 0.09, "learning_rate": 3.4000000000000007e-05, "logits/chosen": -2.7922043800354004, "logits/rejected": -2.8177366256713867, "logps/chosen": -202.07823181152344, "logps/rejected": -233.48065185546875, "loss": 0.688, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10107097774744034, "rewards/margins": 0.014442582614719868, "rewards/rejected": -0.11551356315612793, "step": 68 }, { "epoch": 0.09, "learning_rate": 3.45e-05, "logits/chosen": -2.621925115585327, "logits/rejected": -2.548135280609131, "logps/chosen": -163.22723388671875, "logps/rejected": -183.32742309570312, "loss": 0.6853, "rewards/accuracies": 0.4375, "rewards/chosen": -0.009019946679472923, "rewards/margins": 0.020815372467041016, "rewards/rejected": -0.029835321009159088, "step": 69 }, { "epoch": 0.09, "learning_rate": 3.5e-05, "logits/chosen": -2.7518372535705566, "logits/rejected": -2.7260899543762207, "logps/chosen": -223.34564208984375, "logps/rejected": -245.5427703857422, "loss": 0.7156, "rewards/accuracies": 0.375, "rewards/chosen": -0.12107516080141068, "rewards/margins": -0.03887636959552765, "rewards/rejected": -0.08219879120588303, "step": 70 }, { "epoch": 0.09, "learning_rate": 3.55e-05, "logits/chosen": -2.5108494758605957, "logits/rejected": -2.4832143783569336, "logps/chosen": -166.49508666992188, "logps/rejected": -157.7345428466797, "loss": 0.6918, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05453377217054367, "rewards/margins": 0.0069806561805307865, "rewards/rejected": -0.06151442974805832, "step": 71 }, { "epoch": 0.09, "learning_rate": 3.6e-05, "logits/chosen": -2.636711359024048, "logits/rejected": -2.6597518920898438, "logps/chosen": -174.1949005126953, "logps/rejected": -189.0026397705078, "loss": 0.6782, "rewards/accuracies": 0.625, "rewards/chosen": -0.005148457363247871, "rewards/margins": 0.03288703039288521, "rewards/rejected": -0.03803548216819763, "step": 72 }, { "epoch": 0.1, "learning_rate": 3.65e-05, "logits/chosen": -2.6395423412323, "logits/rejected": -2.6543948650360107, "logps/chosen": -146.5878448486328, "logps/rejected": -180.54176330566406, "loss": 0.7075, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0687551274895668, "rewards/margins": -0.02330932579934597, "rewards/rejected": -0.045445799827575684, "step": 73 }, { "epoch": 0.1, "learning_rate": 3.7e-05, "logits/chosen": -2.4257636070251465, "logits/rejected": -2.502183675765991, "logps/chosen": -179.7578125, "logps/rejected": -227.62875366210938, "loss": 0.7125, "rewards/accuracies": 0.1875, "rewards/chosen": -0.03154797852039337, "rewards/margins": -0.03330230712890625, "rewards/rejected": 0.0017543300054967403, "step": 74 }, { "epoch": 0.1, "learning_rate": 3.7500000000000003e-05, "logits/chosen": -2.3436474800109863, "logits/rejected": -2.498957633972168, "logps/chosen": -151.80540466308594, "logps/rejected": -164.84146118164062, "loss": 0.6777, "rewards/accuracies": 0.8125, "rewards/chosen": -0.017311906442046165, "rewards/margins": 0.03238987550139427, "rewards/rejected": -0.049701791256666183, "step": 75 }, { "epoch": 0.1, "learning_rate": 3.8e-05, "logits/chosen": -2.558389663696289, "logits/rejected": -2.6029064655303955, "logps/chosen": -176.9400177001953, "logps/rejected": -177.18336486816406, "loss": 0.7002, "rewards/accuracies": 0.5, "rewards/chosen": -0.04031562805175781, "rewards/margins": -0.009864617139101028, "rewards/rejected": -0.030451007187366486, "step": 76 }, { "epoch": 0.1, "learning_rate": 3.85e-05, "logits/chosen": -2.651350259780884, "logits/rejected": -2.7427544593811035, "logps/chosen": -210.8737335205078, "logps/rejected": -199.6597137451172, "loss": 0.6787, "rewards/accuracies": 0.5625, "rewards/chosen": -0.048238325864076614, "rewards/margins": 0.031043197959661484, "rewards/rejected": -0.0792815238237381, "step": 77 }, { "epoch": 0.1, "learning_rate": 3.9000000000000006e-05, "logits/chosen": -2.4323079586029053, "logits/rejected": -2.576862335205078, "logps/chosen": -127.96165466308594, "logps/rejected": -161.3166961669922, "loss": 0.724, "rewards/accuracies": 0.375, "rewards/chosen": -0.1004774421453476, "rewards/margins": -0.053969353437423706, "rewards/rejected": -0.046508073806762695, "step": 78 }, { "epoch": 0.1, "learning_rate": 3.9500000000000005e-05, "logits/chosen": -2.526383638381958, "logits/rejected": -2.552208423614502, "logps/chosen": -183.66246032714844, "logps/rejected": -195.2118377685547, "loss": 0.6934, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10480672121047974, "rewards/margins": 0.005925657227635384, "rewards/rejected": -0.11073236912488937, "step": 79 }, { "epoch": 0.1, "learning_rate": 4e-05, "logits/chosen": -2.635021209716797, "logits/rejected": -2.7062149047851562, "logps/chosen": -203.2865753173828, "logps/rejected": -200.7851104736328, "loss": 0.6553, "rewards/accuracies": 0.75, "rewards/chosen": -0.012298012152314186, "rewards/margins": 0.08196020871400833, "rewards/rejected": -0.09425821155309677, "step": 80 }, { "epoch": 0.11, "learning_rate": 4.05e-05, "logits/chosen": -2.48476505279541, "logits/rejected": -2.613018274307251, "logps/chosen": -159.0533905029297, "logps/rejected": -222.8763427734375, "loss": 0.6488, "rewards/accuracies": 0.6875, "rewards/chosen": -0.017749834805727005, "rewards/margins": 0.10052147507667542, "rewards/rejected": -0.11827130615711212, "step": 81 }, { "epoch": 0.11, "learning_rate": 4.1e-05, "logits/chosen": -2.5390264987945557, "logits/rejected": -2.5940589904785156, "logps/chosen": -220.54562377929688, "logps/rejected": -193.7894744873047, "loss": 0.7168, "rewards/accuracies": 0.375, "rewards/chosen": -0.15124498307704926, "rewards/margins": -0.04028485342860222, "rewards/rejected": -0.11096014082431793, "step": 82 }, { "epoch": 0.11, "learning_rate": 4.15e-05, "logits/chosen": -2.6519391536712646, "logits/rejected": -2.6670496463775635, "logps/chosen": -174.1206512451172, "logps/rejected": -182.67996215820312, "loss": 0.689, "rewards/accuracies": 0.5625, "rewards/chosen": -0.049321744590997696, "rewards/margins": 0.012697530910372734, "rewards/rejected": -0.06201927736401558, "step": 83 }, { "epoch": 0.11, "learning_rate": 4.2e-05, "logits/chosen": -2.717592239379883, "logits/rejected": -2.7927956581115723, "logps/chosen": -169.05294799804688, "logps/rejected": -190.73846435546875, "loss": 0.664, "rewards/accuracies": 0.625, "rewards/chosen": -0.03787894546985626, "rewards/margins": 0.06431596726179123, "rewards/rejected": -0.1021949052810669, "step": 84 }, { "epoch": 0.11, "learning_rate": 4.25e-05, "logits/chosen": -2.5930697917938232, "logits/rejected": -2.509345531463623, "logps/chosen": -179.2568817138672, "logps/rejected": -168.377685546875, "loss": 0.6953, "rewards/accuracies": 0.5, "rewards/chosen": -0.05612773820757866, "rewards/margins": 0.006718709133565426, "rewards/rejected": -0.06284645199775696, "step": 85 }, { "epoch": 0.11, "learning_rate": 4.3e-05, "logits/chosen": -2.5787692070007324, "logits/rejected": -2.64978289604187, "logps/chosen": -172.04168701171875, "logps/rejected": -160.20840454101562, "loss": 0.6896, "rewards/accuracies": 0.5, "rewards/chosen": -0.11709931492805481, "rewards/margins": 0.018632344901561737, "rewards/rejected": -0.13573165237903595, "step": 86 }, { "epoch": 0.11, "learning_rate": 4.35e-05, "logits/chosen": -2.480304002761841, "logits/rejected": -2.4922451972961426, "logps/chosen": -200.24014282226562, "logps/rejected": -218.20352172851562, "loss": 0.6417, "rewards/accuracies": 0.625, "rewards/chosen": -0.08723511546850204, "rewards/margins": 0.12010292708873749, "rewards/rejected": -0.20733806490898132, "step": 87 }, { "epoch": 0.12, "learning_rate": 4.4000000000000006e-05, "logits/chosen": -2.7480030059814453, "logits/rejected": -2.703220844268799, "logps/chosen": -164.8146514892578, "logps/rejected": -173.18063354492188, "loss": 0.7038, "rewards/accuracies": 0.375, "rewards/chosen": -0.1111496239900589, "rewards/margins": -0.00250411219894886, "rewards/rejected": -0.10864551365375519, "step": 88 }, { "epoch": 0.12, "learning_rate": 4.4500000000000004e-05, "logits/chosen": -2.4365036487579346, "logits/rejected": -2.5382070541381836, "logps/chosen": -173.4228515625, "logps/rejected": -228.0253448486328, "loss": 0.662, "rewards/accuracies": 0.6875, "rewards/chosen": -0.022041939198970795, "rewards/margins": 0.07041654735803604, "rewards/rejected": -0.09245848655700684, "step": 89 }, { "epoch": 0.12, "learning_rate": 4.5e-05, "logits/chosen": -2.5812180042266846, "logits/rejected": -2.6432223320007324, "logps/chosen": -155.36810302734375, "logps/rejected": -164.1707000732422, "loss": 0.7, "rewards/accuracies": 0.4375, "rewards/chosen": -0.2002555876970291, "rewards/margins": -0.011146757751703262, "rewards/rejected": -0.18910883367061615, "step": 90 }, { "epoch": 0.12, "learning_rate": 4.55e-05, "logits/chosen": -2.6511974334716797, "logits/rejected": -2.7204787731170654, "logps/chosen": -172.86270141601562, "logps/rejected": -176.37405395507812, "loss": 0.6919, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11853408813476562, "rewards/margins": 0.03132196143269539, "rewards/rejected": -0.14985604584217072, "step": 91 }, { "epoch": 0.12, "learning_rate": 4.600000000000001e-05, "logits/chosen": -2.753451108932495, "logits/rejected": -2.6007282733917236, "logps/chosen": -192.64649963378906, "logps/rejected": -164.47393798828125, "loss": 0.7061, "rewards/accuracies": 0.5625, "rewards/chosen": -0.26501375436782837, "rewards/margins": -0.007466696202754974, "rewards/rejected": -0.2575470507144928, "step": 92 }, { "epoch": 0.12, "learning_rate": 4.6500000000000005e-05, "logits/chosen": -2.6831321716308594, "logits/rejected": -2.657895088195801, "logps/chosen": -191.9042510986328, "logps/rejected": -165.17892456054688, "loss": 0.7455, "rewards/accuracies": 0.3125, "rewards/chosen": -0.21064424514770508, "rewards/margins": -0.08327949792146683, "rewards/rejected": -0.12736473977565765, "step": 93 }, { "epoch": 0.12, "learning_rate": 4.7e-05, "logits/chosen": -2.5191149711608887, "logits/rejected": -2.6359121799468994, "logps/chosen": -171.92013549804688, "logps/rejected": -177.6357421875, "loss": 0.7085, "rewards/accuracies": 0.5, "rewards/chosen": -0.274119108915329, "rewards/margins": -0.0034166108816862106, "rewards/rejected": -0.2707024812698364, "step": 94 }, { "epoch": 0.12, "learning_rate": 4.75e-05, "logits/chosen": -2.731271505355835, "logits/rejected": -2.7019336223602295, "logps/chosen": -180.59613037109375, "logps/rejected": -184.8212890625, "loss": 0.6521, "rewards/accuracies": 0.5, "rewards/chosen": -0.14632394909858704, "rewards/margins": 0.09543509036302567, "rewards/rejected": -0.2417590469121933, "step": 95 }, { "epoch": 0.13, "learning_rate": 4.8e-05, "logits/chosen": -2.490906000137329, "logits/rejected": -2.6088736057281494, "logps/chosen": -162.78256225585938, "logps/rejected": -176.74588012695312, "loss": 0.6336, "rewards/accuracies": 0.5625, "rewards/chosen": -0.14169403910636902, "rewards/margins": 0.14634834229946136, "rewards/rejected": -0.2880423963069916, "step": 96 }, { "epoch": 0.13, "learning_rate": 4.85e-05, "logits/chosen": -2.6869685649871826, "logits/rejected": -2.6618151664733887, "logps/chosen": -158.98098754882812, "logps/rejected": -165.8663330078125, "loss": 0.6656, "rewards/accuracies": 0.625, "rewards/chosen": -0.17300596833229065, "rewards/margins": 0.07225295156240463, "rewards/rejected": -0.24525892734527588, "step": 97 }, { "epoch": 0.13, "learning_rate": 4.9e-05, "logits/chosen": -2.655244827270508, "logits/rejected": -2.7159557342529297, "logps/chosen": -187.57041931152344, "logps/rejected": -189.180908203125, "loss": 0.6596, "rewards/accuracies": 0.625, "rewards/chosen": -0.16985346376895905, "rewards/margins": 0.0829225480556488, "rewards/rejected": -0.25277602672576904, "step": 98 }, { "epoch": 0.13, "learning_rate": 4.9500000000000004e-05, "logits/chosen": -2.5713696479797363, "logits/rejected": -2.619272470474243, "logps/chosen": -164.45220947265625, "logps/rejected": -189.011962890625, "loss": 0.6332, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1459609568119049, "rewards/margins": 0.14748626947402954, "rewards/rejected": -0.29344722628593445, "step": 99 }, { "epoch": 0.13, "learning_rate": 5e-05, "logits/chosen": -2.7377047538757324, "logits/rejected": -2.807655096054077, "logps/chosen": -212.2166748046875, "logps/rejected": -205.24362182617188, "loss": 0.7779, "rewards/accuracies": 0.3125, "rewards/chosen": -0.3374475836753845, "rewards/margins": -0.14080718159675598, "rewards/rejected": -0.19664038717746735, "step": 100 }, { "epoch": 0.13, "learning_rate": 4.999997432392803e-05, "logits/chosen": -2.604559898376465, "logits/rejected": -2.5846409797668457, "logps/chosen": -182.06011962890625, "logps/rejected": -185.51695251464844, "loss": 0.723, "rewards/accuracies": 0.3125, "rewards/chosen": -0.2137359231710434, "rewards/margins": -0.0511900931596756, "rewards/rejected": -0.1625458300113678, "step": 101 }, { "epoch": 0.13, "learning_rate": 4.9999897295764844e-05, "logits/chosen": -2.7066619396209717, "logits/rejected": -2.6727826595306396, "logps/chosen": -194.32232666015625, "logps/rejected": -219.6756134033203, "loss": 0.6905, "rewards/accuracies": 0.5, "rewards/chosen": -0.2863636910915375, "rewards/margins": 0.04078531265258789, "rewards/rejected": -0.32714903354644775, "step": 102 }, { "epoch": 0.13, "learning_rate": 4.9999768915668665e-05, "logits/chosen": -2.5064425468444824, "logits/rejected": -2.49991512298584, "logps/chosen": -168.347900390625, "logps/rejected": -165.95089721679688, "loss": 0.6714, "rewards/accuracies": 0.5, "rewards/chosen": -0.28736329078674316, "rewards/margins": 0.0547042116522789, "rewards/rejected": -0.3420674800872803, "step": 103 }, { "epoch": 0.14, "learning_rate": 4.999958918390321e-05, "logits/chosen": -2.568999767303467, "logits/rejected": -2.629004716873169, "logps/chosen": -186.33514404296875, "logps/rejected": -215.6304168701172, "loss": 0.6716, "rewards/accuracies": 0.5, "rewards/chosen": -0.23962916433811188, "rewards/margins": 0.06436805427074432, "rewards/rejected": -0.3039971888065338, "step": 104 }, { "epoch": 0.14, "learning_rate": 4.999935810083766e-05, "logits/chosen": -2.667313575744629, "logits/rejected": -2.6559674739837646, "logps/chosen": -154.65960693359375, "logps/rejected": -155.13522338867188, "loss": 0.667, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2854609489440918, "rewards/margins": 0.0853797197341919, "rewards/rejected": -0.3708406686782837, "step": 105 }, { "epoch": 0.14, "learning_rate": 4.999907566694667e-05, "logits/chosen": -2.5488526821136475, "logits/rejected": -2.485109567642212, "logps/chosen": -222.78338623046875, "logps/rejected": -222.6953887939453, "loss": 0.6656, "rewards/accuracies": 0.625, "rewards/chosen": -0.28172624111175537, "rewards/margins": 0.07234585285186768, "rewards/rejected": -0.35407203435897827, "step": 106 }, { "epoch": 0.14, "learning_rate": 4.9998741882810384e-05, "logits/chosen": -2.872877597808838, "logits/rejected": -2.8446714878082275, "logps/chosen": -182.39260864257812, "logps/rejected": -213.32289123535156, "loss": 0.6628, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2876412272453308, "rewards/margins": 0.08153553307056427, "rewards/rejected": -0.3691767454147339, "step": 107 }, { "epoch": 0.14, "learning_rate": 4.999835674911443e-05, "logits/chosen": -2.7004892826080322, "logits/rejected": -2.6362013816833496, "logps/chosen": -162.95108032226562, "logps/rejected": -161.67657470703125, "loss": 0.6744, "rewards/accuracies": 0.625, "rewards/chosen": -0.3595612049102783, "rewards/margins": 0.053390078246593475, "rewards/rejected": -0.4129512906074524, "step": 108 }, { "epoch": 0.14, "learning_rate": 4.999792026664991e-05, "logits/chosen": -2.8488755226135254, "logits/rejected": -2.914703845977783, "logps/chosen": -173.82363891601562, "logps/rejected": -182.16908264160156, "loss": 0.7065, "rewards/accuracies": 0.5625, "rewards/chosen": -0.48598194122314453, "rewards/margins": 0.03310241550207138, "rewards/rejected": -0.5190844535827637, "step": 109 }, { "epoch": 0.14, "learning_rate": 4.9997432436313384e-05, "logits/chosen": -2.5648086071014404, "logits/rejected": -2.5932183265686035, "logps/chosen": -185.70846557617188, "logps/rejected": -189.32725524902344, "loss": 0.6842, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4091907739639282, "rewards/margins": 0.04006391391158104, "rewards/rejected": -0.4492546617984772, "step": 110 }, { "epoch": 0.15, "learning_rate": 4.99968932591069e-05, "logits/chosen": -2.770993232727051, "logits/rejected": -2.7647171020507812, "logps/chosen": -208.0184326171875, "logps/rejected": -202.5152587890625, "loss": 0.6867, "rewards/accuracies": 0.5, "rewards/chosen": -0.49601614475250244, "rewards/margins": 0.027304889634251595, "rewards/rejected": -0.5233210921287537, "step": 111 }, { "epoch": 0.15, "learning_rate": 4.999630273613799e-05, "logits/chosen": -2.4182567596435547, "logits/rejected": -2.611553430557251, "logps/chosen": -138.6142578125, "logps/rejected": -187.48898315429688, "loss": 0.7333, "rewards/accuracies": 0.375, "rewards/chosen": -0.5143216252326965, "rewards/margins": -0.011710070073604584, "rewards/rejected": -0.5026116371154785, "step": 112 }, { "epoch": 0.15, "learning_rate": 4.999566086861961e-05, "logits/chosen": -2.7679452896118164, "logits/rejected": -2.8067147731781006, "logps/chosen": -186.18055725097656, "logps/rejected": -198.92605590820312, "loss": 0.8218, "rewards/accuracies": 0.1875, "rewards/chosen": -0.6542762517929077, "rewards/margins": -0.2115614116191864, "rewards/rejected": -0.4427148103713989, "step": 113 }, { "epoch": 0.15, "learning_rate": 4.999496765787024e-05, "logits/chosen": -2.842291831970215, "logits/rejected": -2.8023149967193604, "logps/chosen": -162.9304962158203, "logps/rejected": -182.9091796875, "loss": 0.6623, "rewards/accuracies": 0.5, "rewards/chosen": -0.5319749116897583, "rewards/margins": 0.08390979468822479, "rewards/rejected": -0.6158846616744995, "step": 114 }, { "epoch": 0.15, "learning_rate": 4.9994223105313774e-05, "logits/chosen": -2.980092763900757, "logits/rejected": -2.9631030559539795, "logps/chosen": -217.55894470214844, "logps/rejected": -226.1593780517578, "loss": 0.6824, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4032873809337616, "rewards/margins": 0.0588911734521389, "rewards/rejected": -0.4621785283088684, "step": 115 }, { "epoch": 0.15, "learning_rate": 4.9993427212479606e-05, "logits/chosen": -2.5391757488250732, "logits/rejected": -2.725529909133911, "logps/chosen": -179.54200744628906, "logps/rejected": -204.42666625976562, "loss": 0.6975, "rewards/accuracies": 0.375, "rewards/chosen": -0.39779067039489746, "rewards/margins": 0.0027880650013685226, "rewards/rejected": -0.40057870745658875, "step": 116 }, { "epoch": 0.15, "learning_rate": 4.999257998100254e-05, "logits/chosen": -2.733851671218872, "logits/rejected": -2.87308931350708, "logps/chosen": -176.08485412597656, "logps/rejected": -193.49484252929688, "loss": 0.6449, "rewards/accuracies": 0.625, "rewards/chosen": -0.40423309803009033, "rewards/margins": 0.11808924376964569, "rewards/rejected": -0.5223223567008972, "step": 117 }, { "epoch": 0.15, "learning_rate": 4.999168141262289e-05, "logits/chosen": -2.7447009086608887, "logits/rejected": -2.808476448059082, "logps/chosen": -224.22067260742188, "logps/rejected": -265.44964599609375, "loss": 0.543, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5002200603485107, "rewards/margins": 0.40809527039527893, "rewards/rejected": -0.9083153605461121, "step": 118 }, { "epoch": 0.16, "learning_rate": 4.9990731509186376e-05, "logits/chosen": -2.639643430709839, "logits/rejected": -2.7004880905151367, "logps/chosen": -126.65451049804688, "logps/rejected": -152.20460510253906, "loss": 0.6935, "rewards/accuracies": 0.375, "rewards/chosen": -0.5373567342758179, "rewards/margins": 0.048874713480472565, "rewards/rejected": -0.5862314105033875, "step": 119 }, { "epoch": 0.16, "learning_rate": 4.998973027264419e-05, "logits/chosen": -2.6515791416168213, "logits/rejected": -2.7365236282348633, "logps/chosen": -174.46641540527344, "logps/rejected": -225.31158447265625, "loss": 0.6869, "rewards/accuracies": 0.5, "rewards/chosen": -0.5558931827545166, "rewards/margins": 0.04842944070696831, "rewards/rejected": -0.6043226718902588, "step": 120 }, { "epoch": 0.16, "learning_rate": 4.998867770505295e-05, "logits/chosen": -2.724031686782837, "logits/rejected": -2.709028482437134, "logps/chosen": -166.08251953125, "logps/rejected": -183.7383575439453, "loss": 0.6879, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5829455256462097, "rewards/margins": 0.03844447806477547, "rewards/rejected": -0.6213899850845337, "step": 121 }, { "epoch": 0.16, "learning_rate": 4.9987573808574726e-05, "logits/chosen": -2.696485996246338, "logits/rejected": -2.779482364654541, "logps/chosen": -161.10256958007812, "logps/rejected": -185.7605743408203, "loss": 0.5904, "rewards/accuracies": 0.8125, "rewards/chosen": -0.41624942421913147, "rewards/margins": 0.23752669990062714, "rewards/rejected": -0.6537761092185974, "step": 122 }, { "epoch": 0.16, "learning_rate": 4.9986418585477016e-05, "logits/chosen": -2.7433488368988037, "logits/rejected": -2.8069894313812256, "logps/chosen": -160.246826171875, "logps/rejected": -171.8643798828125, "loss": 0.728, "rewards/accuracies": 0.5, "rewards/chosen": -0.4797200560569763, "rewards/margins": -0.032596245408058167, "rewards/rejected": -0.44712376594543457, "step": 123 }, { "epoch": 0.16, "learning_rate": 4.998521203813274e-05, "logits/chosen": -2.738048791885376, "logits/rejected": -2.747162342071533, "logps/chosen": -190.9744873046875, "logps/rejected": -181.24745178222656, "loss": 0.755, "rewards/accuracies": 0.375, "rewards/chosen": -0.5226253271102905, "rewards/margins": -0.04068867489695549, "rewards/rejected": -0.48193663358688354, "step": 124 }, { "epoch": 0.16, "learning_rate": 4.9983954169020256e-05, "logits/chosen": -2.6100339889526367, "logits/rejected": -2.634096145629883, "logps/chosen": -176.01905822753906, "logps/rejected": -161.22412109375, "loss": 0.748, "rewards/accuracies": 0.375, "rewards/chosen": -0.5368779301643372, "rewards/margins": -0.09010656177997589, "rewards/rejected": -0.4467713534832001, "step": 125 }, { "epoch": 0.16, "learning_rate": 4.9982644980723334e-05, "logits/chosen": -2.797285556793213, "logits/rejected": -2.825375556945801, "logps/chosen": -141.42039489746094, "logps/rejected": -148.95436096191406, "loss": 0.7302, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5232774615287781, "rewards/margins": -0.04633237421512604, "rewards/rejected": -0.47694510221481323, "step": 126 }, { "epoch": 0.17, "learning_rate": 4.998128447593117e-05, "logits/chosen": -2.6646504402160645, "logits/rejected": -2.7834081649780273, "logps/chosen": -230.68499755859375, "logps/rejected": -245.01414489746094, "loss": 0.661, "rewards/accuracies": 0.5625, "rewards/chosen": -0.687300980091095, "rewards/margins": 0.14017513394355774, "rewards/rejected": -0.8274761438369751, "step": 127 }, { "epoch": 0.17, "learning_rate": 4.997987265743834e-05, "logits/chosen": -2.775637149810791, "logits/rejected": -2.738879919052124, "logps/chosen": -166.31509399414062, "logps/rejected": -169.01321411132812, "loss": 0.6557, "rewards/accuracies": 0.625, "rewards/chosen": -0.4408762454986572, "rewards/margins": 0.11477227509021759, "rewards/rejected": -0.5556485652923584, "step": 128 }, { "epoch": 0.17, "learning_rate": 4.997840952814484e-05, "logits/chosen": -2.637038469314575, "logits/rejected": -2.666442394256592, "logps/chosen": -151.3613739013672, "logps/rejected": -160.8176727294922, "loss": 0.6752, "rewards/accuracies": 0.375, "rewards/chosen": -0.4970799684524536, "rewards/margins": 0.08162279427051544, "rewards/rejected": -0.5787028074264526, "step": 129 }, { "epoch": 0.17, "learning_rate": 4.9976895091056075e-05, "logits/chosen": -2.7471365928649902, "logits/rejected": -2.6176395416259766, "logps/chosen": -204.17808532714844, "logps/rejected": -224.61956787109375, "loss": 0.6923, "rewards/accuracies": 0.625, "rewards/chosen": -0.6104612946510315, "rewards/margins": 0.11507527530193329, "rewards/rejected": -0.725536584854126, "step": 130 }, { "epoch": 0.17, "learning_rate": 4.9975329349282826e-05, "logits/chosen": -2.7353034019470215, "logits/rejected": -2.743035078048706, "logps/chosen": -184.143798828125, "logps/rejected": -197.323974609375, "loss": 0.6711, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6166396141052246, "rewards/margins": 0.08574585616588593, "rewards/rejected": -0.7023855447769165, "step": 131 }, { "epoch": 0.17, "learning_rate": 4.9973712306041256e-05, "logits/chosen": -2.6548473834991455, "logits/rejected": -2.6762022972106934, "logps/chosen": -193.84849548339844, "logps/rejected": -175.150634765625, "loss": 0.7597, "rewards/accuracies": 0.375, "rewards/chosen": -0.6773683428764343, "rewards/margins": -0.10929510742425919, "rewards/rejected": -0.5680732131004333, "step": 132 }, { "epoch": 0.17, "learning_rate": 4.997204396465292e-05, "logits/chosen": -2.787050724029541, "logits/rejected": -2.777578353881836, "logps/chosen": -195.51785278320312, "logps/rejected": -190.51622009277344, "loss": 0.7073, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5759019255638123, "rewards/margins": 0.019099120050668716, "rewards/rejected": -0.5950011014938354, "step": 133 }, { "epoch": 0.18, "learning_rate": 4.997032432854472e-05, "logits/chosen": -2.6198296546936035, "logits/rejected": -2.661146640777588, "logps/chosen": -149.22142028808594, "logps/rejected": -173.4526824951172, "loss": 0.6885, "rewards/accuracies": 0.5, "rewards/chosen": -0.5247339010238647, "rewards/margins": 0.05187266319990158, "rewards/rejected": -0.5766065120697021, "step": 134 }, { "epoch": 0.18, "learning_rate": 4.996855340124894e-05, "logits/chosen": -2.637789487838745, "logits/rejected": -2.6017353534698486, "logps/chosen": -158.05914306640625, "logps/rejected": -175.1423797607422, "loss": 0.6985, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5851290822029114, "rewards/margins": 0.03735842555761337, "rewards/rejected": -0.6224875450134277, "step": 135 }, { "epoch": 0.18, "learning_rate": 4.996673118640323e-05, "logits/chosen": -2.37221360206604, "logits/rejected": -2.5038788318634033, "logps/chosen": -143.90518188476562, "logps/rejected": -222.84115600585938, "loss": 0.6539, "rewards/accuracies": 0.375, "rewards/chosen": -0.36046820878982544, "rewards/margins": 0.14497990906238556, "rewards/rejected": -0.5054481029510498, "step": 136 }, { "epoch": 0.18, "learning_rate": 4.996485768775055e-05, "logits/chosen": -2.807823419570923, "logits/rejected": -2.800899028778076, "logps/chosen": -169.72129821777344, "logps/rejected": -181.5267333984375, "loss": 0.6754, "rewards/accuracies": 0.5625, "rewards/chosen": -0.543328046798706, "rewards/margins": 0.11098619550466537, "rewards/rejected": -0.6543142199516296, "step": 137 }, { "epoch": 0.18, "learning_rate": 4.996293290913926e-05, "logits/chosen": -2.795060634613037, "logits/rejected": -2.9101216793060303, "logps/chosen": -136.24945068359375, "logps/rejected": -162.91119384765625, "loss": 0.61, "rewards/accuracies": 0.75, "rewards/chosen": -0.3900853395462036, "rewards/margins": 0.19977441430091858, "rewards/rejected": -0.5898597836494446, "step": 138 }, { "epoch": 0.18, "learning_rate": 4.9960956854522986e-05, "logits/chosen": -2.7642905712127686, "logits/rejected": -2.7773826122283936, "logps/chosen": -158.06378173828125, "logps/rejected": -221.30577087402344, "loss": 0.625, "rewards/accuracies": 0.625, "rewards/chosen": -0.4831012189388275, "rewards/margins": 0.18627440929412842, "rewards/rejected": -0.6693755984306335, "step": 139 }, { "epoch": 0.18, "learning_rate": 4.995892952796074e-05, "logits/chosen": -2.7154903411865234, "logits/rejected": -2.7624685764312744, "logps/chosen": -187.28146362304688, "logps/rejected": -189.83071899414062, "loss": 0.6819, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6895617842674255, "rewards/margins": 0.06184637174010277, "rewards/rejected": -0.7514082193374634, "step": 140 }, { "epoch": 0.18, "learning_rate": 4.995685093361682e-05, "logits/chosen": -2.7003986835479736, "logits/rejected": -2.754859209060669, "logps/chosen": -160.55992126464844, "logps/rejected": -172.67434692382812, "loss": 0.725, "rewards/accuracies": 0.5, "rewards/chosen": -0.6082602739334106, "rewards/margins": -0.01646682247519493, "rewards/rejected": -0.5917934775352478, "step": 141 }, { "epoch": 0.19, "learning_rate": 4.9954721075760824e-05, "logits/chosen": -2.7585508823394775, "logits/rejected": -2.7562849521636963, "logps/chosen": -186.20509338378906, "logps/rejected": -190.92132568359375, "loss": 0.6594, "rewards/accuracies": 0.625, "rewards/chosen": -0.5081749558448792, "rewards/margins": 0.10397283732891083, "rewards/rejected": -0.6121478080749512, "step": 142 }, { "epoch": 0.19, "learning_rate": 4.995253995876767e-05, "logits/chosen": -2.808187246322632, "logits/rejected": -2.869479179382324, "logps/chosen": -172.86203002929688, "logps/rejected": -175.89739990234375, "loss": 0.5562, "rewards/accuracies": 0.75, "rewards/chosen": -0.31780511140823364, "rewards/margins": 0.3512324392795563, "rewards/rejected": -0.6690375804901123, "step": 143 }, { "epoch": 0.19, "learning_rate": 4.995030758711756e-05, "logits/chosen": -2.9907169342041016, "logits/rejected": -2.968203544616699, "logps/chosen": -191.64285278320312, "logps/rejected": -177.73028564453125, "loss": 0.7513, "rewards/accuracies": 0.375, "rewards/chosen": -0.7068686485290527, "rewards/margins": -0.05182289704680443, "rewards/rejected": -0.6550456881523132, "step": 144 }, { "epoch": 0.19, "learning_rate": 4.994802396539598e-05, "logits/chosen": -2.8123016357421875, "logits/rejected": -2.8668291568756104, "logps/chosen": -172.08924865722656, "logps/rejected": -195.8844451904297, "loss": 0.696, "rewards/accuracies": 0.5, "rewards/chosen": -0.5609222054481506, "rewards/margins": 0.05493137985467911, "rewards/rejected": -0.6158535480499268, "step": 145 }, { "epoch": 0.19, "learning_rate": 4.994568909829368e-05, "logits/chosen": -2.892430305480957, "logits/rejected": -2.762629985809326, "logps/chosen": -216.95150756835938, "logps/rejected": -187.0184783935547, "loss": 0.7119, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7327225208282471, "rewards/margins": -0.021204425022006035, "rewards/rejected": -0.7115181684494019, "step": 146 }, { "epoch": 0.19, "learning_rate": 4.9943302990606684e-05, "logits/chosen": -2.7017452716827393, "logits/rejected": -2.7307963371276855, "logps/chosen": -198.5173797607422, "logps/rejected": -185.40316772460938, "loss": 0.6751, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6512372493743896, "rewards/margins": 0.10363547503948212, "rewards/rejected": -0.754872739315033, "step": 147 }, { "epoch": 0.19, "learning_rate": 4.994086564723626e-05, "logits/chosen": -2.835409641265869, "logits/rejected": -2.8388915061950684, "logps/chosen": -173.46127319335938, "logps/rejected": -185.3079376220703, "loss": 0.6937, "rewards/accuracies": 0.625, "rewards/chosen": -0.6533925533294678, "rewards/margins": 0.033594585955142975, "rewards/rejected": -0.6869871616363525, "step": 148 }, { "epoch": 0.2, "learning_rate": 4.9938377073188905e-05, "logits/chosen": -2.9569547176361084, "logits/rejected": -2.9091203212738037, "logps/chosen": -201.7881317138672, "logps/rejected": -180.14285278320312, "loss": 0.7056, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7249323725700378, "rewards/margins": 0.014494583010673523, "rewards/rejected": -0.7394269704818726, "step": 149 }, { "epoch": 0.2, "learning_rate": 4.993583727357638e-05, "logits/chosen": -2.6668853759765625, "logits/rejected": -2.694221258163452, "logps/chosen": -198.40594482421875, "logps/rejected": -201.02980041503906, "loss": 0.7579, "rewards/accuracies": 0.375, "rewards/chosen": -0.652782678604126, "rewards/margins": -0.08612270653247833, "rewards/rejected": -0.5666600465774536, "step": 150 }, { "epoch": 0.2, "learning_rate": 4.993324625361565e-05, "logits/chosen": -2.757725954055786, "logits/rejected": -2.7627735137939453, "logps/chosen": -200.16226196289062, "logps/rejected": -188.62083435058594, "loss": 0.6402, "rewards/accuracies": 0.75, "rewards/chosen": -0.44451814889907837, "rewards/margins": 0.1431855410337448, "rewards/rejected": -0.5877037048339844, "step": 151 }, { "epoch": 0.2, "learning_rate": 4.993060401862888e-05, "logits/chosen": -2.7355546951293945, "logits/rejected": -2.7701869010925293, "logps/chosen": -170.37046813964844, "logps/rejected": -182.28440856933594, "loss": 0.6787, "rewards/accuracies": 0.5, "rewards/chosen": -0.5462682247161865, "rewards/margins": 0.08645598590373993, "rewards/rejected": -0.6327242255210876, "step": 152 }, { "epoch": 0.2, "learning_rate": 4.9927910574043465e-05, "logits/chosen": -2.893017530441284, "logits/rejected": -2.9069924354553223, "logps/chosen": -215.6539764404297, "logps/rejected": -256.93194580078125, "loss": 0.642, "rewards/accuracies": 0.625, "rewards/chosen": -0.635912299156189, "rewards/margins": 0.1641692817211151, "rewards/rejected": -0.8000816702842712, "step": 153 }, { "epoch": 0.2, "learning_rate": 4.992516592539196e-05, "logits/chosen": -2.544395685195923, "logits/rejected": -2.563568353652954, "logps/chosen": -144.60275268554688, "logps/rejected": -150.6583709716797, "loss": 0.6613, "rewards/accuracies": 0.625, "rewards/chosen": -0.40625688433647156, "rewards/margins": 0.11070521920919418, "rewards/rejected": -0.5169621706008911, "step": 154 }, { "epoch": 0.2, "learning_rate": 4.9922370078312105e-05, "logits/chosen": -2.6519908905029297, "logits/rejected": -2.6390066146850586, "logps/chosen": -187.1649932861328, "logps/rejected": -163.97708129882812, "loss": 0.6462, "rewards/accuracies": 0.625, "rewards/chosen": -0.4761255383491516, "rewards/margins": 0.15067748725414276, "rewards/rejected": -0.6268030405044556, "step": 155 }, { "epoch": 0.2, "learning_rate": 4.991952303854682e-05, "logits/chosen": -2.823216676712036, "logits/rejected": -2.8248276710510254, "logps/chosen": -171.64088439941406, "logps/rejected": -205.40994262695312, "loss": 0.5695, "rewards/accuracies": 0.6875, "rewards/chosen": -0.49252718687057495, "rewards/margins": 0.3139771521091461, "rewards/rejected": -0.8065043687820435, "step": 156 }, { "epoch": 0.21, "learning_rate": 4.9916624811944175e-05, "logits/chosen": -2.6669604778289795, "logits/rejected": -2.720827102661133, "logps/chosen": -145.63650512695312, "logps/rejected": -143.77822875976562, "loss": 0.6691, "rewards/accuracies": 0.625, "rewards/chosen": -0.4779016673564911, "rewards/margins": 0.07923712581396103, "rewards/rejected": -0.5571387410163879, "step": 157 }, { "epoch": 0.21, "learning_rate": 4.991367540445735e-05, "logits/chosen": -2.780358076095581, "logits/rejected": -2.7683119773864746, "logps/chosen": -163.51113891601562, "logps/rejected": -147.0047149658203, "loss": 0.7295, "rewards/accuracies": 0.375, "rewards/chosen": -0.6608467698097229, "rewards/margins": -0.023487316444516182, "rewards/rejected": -0.6373594403266907, "step": 158 }, { "epoch": 0.21, "learning_rate": 4.991067482214471e-05, "logits/chosen": -2.660963296890259, "logits/rejected": -2.6401174068450928, "logps/chosen": -173.09176635742188, "logps/rejected": -171.8109130859375, "loss": 0.7108, "rewards/accuracies": 0.5, "rewards/chosen": -0.6182070970535278, "rewards/margins": -0.01903488114476204, "rewards/rejected": -0.5991722345352173, "step": 159 }, { "epoch": 0.21, "learning_rate": 4.9907623071169686e-05, "logits/chosen": -2.7048563957214355, "logits/rejected": -2.5354063510894775, "logps/chosen": -228.5687255859375, "logps/rejected": -184.3094482421875, "loss": 0.7866, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7742022275924683, "rewards/margins": -0.1047876849770546, "rewards/rejected": -0.6694144606590271, "step": 160 }, { "epoch": 0.21, "learning_rate": 4.990452015780085e-05, "logits/chosen": -2.678699016571045, "logits/rejected": -2.6732139587402344, "logps/chosen": -217.17123413085938, "logps/rejected": -211.92489624023438, "loss": 0.8004, "rewards/accuracies": 0.375, "rewards/chosen": -0.7689281702041626, "rewards/margins": -0.17873500287532806, "rewards/rejected": -0.5901932120323181, "step": 161 }, { "epoch": 0.21, "learning_rate": 4.9901366088411846e-05, "logits/chosen": -2.650327444076538, "logits/rejected": -2.6380615234375, "logps/chosen": -160.45660400390625, "logps/rejected": -149.87843322753906, "loss": 0.7334, "rewards/accuracies": 0.5, "rewards/chosen": -0.4278574585914612, "rewards/margins": -0.050989780575037, "rewards/rejected": -0.3768676817417145, "step": 162 }, { "epoch": 0.21, "learning_rate": 4.98981608694814e-05, "logits/chosen": -2.636261463165283, "logits/rejected": -2.6120524406433105, "logps/chosen": -189.56497192382812, "logps/rejected": -184.8382568359375, "loss": 0.7106, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6862293481826782, "rewards/margins": 0.02107788249850273, "rewards/rejected": -0.7073072791099548, "step": 163 }, { "epoch": 0.21, "learning_rate": 4.9894904507593316e-05, "logits/chosen": -2.681283712387085, "logits/rejected": -2.6194002628326416, "logps/chosen": -158.73968505859375, "logps/rejected": -180.73330688476562, "loss": 0.6498, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4341495633125305, "rewards/margins": 0.13567671179771423, "rewards/rejected": -0.5698262453079224, "step": 164 }, { "epoch": 0.22, "learning_rate": 4.989159700943643e-05, "logits/chosen": -2.822274923324585, "logits/rejected": -2.8219101428985596, "logps/chosen": -182.0315704345703, "logps/rejected": -193.2274169921875, "loss": 0.7126, "rewards/accuracies": 0.5, "rewards/chosen": -0.6531980037689209, "rewards/margins": 0.01201358437538147, "rewards/rejected": -0.6652116179466248, "step": 165 }, { "epoch": 0.22, "learning_rate": 4.988823838180464e-05, "logits/chosen": -2.804276943206787, "logits/rejected": -2.8529317378997803, "logps/chosen": -183.54083251953125, "logps/rejected": -199.4310302734375, "loss": 0.6961, "rewards/accuracies": 0.5, "rewards/chosen": -0.5883633494377136, "rewards/margins": 0.03549729287624359, "rewards/rejected": -0.6238605976104736, "step": 166 }, { "epoch": 0.22, "learning_rate": 4.988482863159684e-05, "logits/chosen": -2.7629952430725098, "logits/rejected": -2.852982759475708, "logps/chosen": -223.6051483154297, "logps/rejected": -215.63427734375, "loss": 0.605, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5412927865982056, "rewards/margins": 0.22360366582870483, "rewards/rejected": -0.7648964524269104, "step": 167 }, { "epoch": 0.22, "learning_rate": 4.988136776581696e-05, "logits/chosen": -2.696824789047241, "logits/rejected": -2.7131996154785156, "logps/chosen": -161.2986297607422, "logps/rejected": -180.24172973632812, "loss": 0.6756, "rewards/accuracies": 0.5625, "rewards/chosen": -0.47621679306030273, "rewards/margins": 0.07477270066738129, "rewards/rejected": -0.5509894490242004, "step": 168 }, { "epoch": 0.22, "learning_rate": 4.9877855791573915e-05, "logits/chosen": -2.5992307662963867, "logits/rejected": -2.5558767318725586, "logps/chosen": -177.31790161132812, "logps/rejected": -173.00013732910156, "loss": 0.7614, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6591533422470093, "rewards/margins": -0.10336636006832123, "rewards/rejected": -0.5557870268821716, "step": 169 }, { "epoch": 0.22, "learning_rate": 4.9874292716081595e-05, "logits/chosen": -2.480238914489746, "logits/rejected": -2.531926155090332, "logps/chosen": -173.81201171875, "logps/rejected": -177.76727294921875, "loss": 0.6506, "rewards/accuracies": 0.625, "rewards/chosen": -0.4206813871860504, "rewards/margins": 0.10889497399330139, "rewards/rejected": -0.5295763611793518, "step": 170 }, { "epoch": 0.22, "learning_rate": 4.9870678546658865e-05, "logits/chosen": -2.68393611907959, "logits/rejected": -2.8312528133392334, "logps/chosen": -238.86911010742188, "logps/rejected": -268.19732666015625, "loss": 0.6572, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5521610975265503, "rewards/margins": 0.11217445880174637, "rewards/rejected": -0.6643356084823608, "step": 171 }, { "epoch": 0.23, "learning_rate": 4.9867013290729535e-05, "logits/chosen": -2.580007314682007, "logits/rejected": -2.5705273151397705, "logps/chosen": -165.80308532714844, "logps/rejected": -203.9613800048828, "loss": 0.7206, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6492197513580322, "rewards/margins": 0.02221706137061119, "rewards/rejected": -0.6714367866516113, "step": 172 }, { "epoch": 0.23, "learning_rate": 4.986329695582237e-05, "logits/chosen": -2.7853593826293945, "logits/rejected": -2.7307794094085693, "logps/chosen": -211.93991088867188, "logps/rejected": -200.86334228515625, "loss": 0.7051, "rewards/accuracies": 0.5, "rewards/chosen": -0.5223960876464844, "rewards/margins": 0.0009156223386526108, "rewards/rejected": -0.5233116745948792, "step": 173 }, { "epoch": 0.23, "learning_rate": 4.985952954957103e-05, "logits/chosen": -2.6804401874542236, "logits/rejected": -2.6449456214904785, "logps/chosen": -187.6370391845703, "logps/rejected": -193.33245849609375, "loss": 0.6809, "rewards/accuracies": 0.5625, "rewards/chosen": -0.513522207736969, "rewards/margins": 0.04058818519115448, "rewards/rejected": -0.5541103482246399, "step": 174 }, { "epoch": 0.23, "learning_rate": 4.985571107971408e-05, "logits/chosen": -2.622426986694336, "logits/rejected": -2.618734836578369, "logps/chosen": -152.2515869140625, "logps/rejected": -168.34747314453125, "loss": 0.7282, "rewards/accuracies": 0.625, "rewards/chosen": -0.5195332765579224, "rewards/margins": -0.02337510883808136, "rewards/rejected": -0.4961581528186798, "step": 175 }, { "epoch": 0.23, "learning_rate": 4.9851841554095e-05, "logits/chosen": -2.6712746620178223, "logits/rejected": -2.6609811782836914, "logps/chosen": -198.9906005859375, "logps/rejected": -164.44154357910156, "loss": 0.7105, "rewards/accuracies": 0.625, "rewards/chosen": -0.4613041877746582, "rewards/margins": -0.0029089637100696564, "rewards/rejected": -0.45839521288871765, "step": 176 }, { "epoch": 0.23, "learning_rate": 4.9847920980662134e-05, "logits/chosen": -2.6356289386749268, "logits/rejected": -2.6573214530944824, "logps/chosen": -175.52487182617188, "logps/rejected": -187.29832458496094, "loss": 0.6474, "rewards/accuracies": 0.625, "rewards/chosen": -0.40463200211524963, "rewards/margins": 0.12262441217899323, "rewards/rejected": -0.527256429195404, "step": 177 }, { "epoch": 0.23, "learning_rate": 4.984394936746865e-05, "logits/chosen": -2.357494354248047, "logits/rejected": -2.411952018737793, "logps/chosen": -139.59608459472656, "logps/rejected": -156.5337371826172, "loss": 0.679, "rewards/accuracies": 0.625, "rewards/chosen": -0.4118153154850006, "rewards/margins": 0.04770222678780556, "rewards/rejected": -0.45951756834983826, "step": 178 }, { "epoch": 0.23, "learning_rate": 4.98399267226726e-05, "logits/chosen": -2.5636932849884033, "logits/rejected": -2.6556129455566406, "logps/chosen": -175.670166015625, "logps/rejected": -179.19627380371094, "loss": 0.6572, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6697508692741394, "rewards/margins": 0.1040661484003067, "rewards/rejected": -0.7738169431686401, "step": 179 }, { "epoch": 0.24, "learning_rate": 4.9835853054536846e-05, "logits/chosen": -2.5892560482025146, "logits/rejected": -2.579235315322876, "logps/chosen": -168.14564514160156, "logps/rejected": -163.5547332763672, "loss": 0.624, "rewards/accuracies": 0.75, "rewards/chosen": -0.5034769177436829, "rewards/margins": 0.1633673459291458, "rewards/rejected": -0.6668442487716675, "step": 180 }, { "epoch": 0.24, "learning_rate": 4.9831728371429046e-05, "logits/chosen": -2.5526325702667236, "logits/rejected": -2.602790355682373, "logps/chosen": -167.693115234375, "logps/rejected": -191.10301208496094, "loss": 0.6836, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5507184267044067, "rewards/margins": 0.07049673795700073, "rewards/rejected": -0.6212151646614075, "step": 181 }, { "epoch": 0.24, "learning_rate": 4.982755268182164e-05, "logits/chosen": -2.581120729446411, "logits/rejected": -2.61881947517395, "logps/chosen": -176.85264587402344, "logps/rejected": -202.72483825683594, "loss": 0.6354, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6271121501922607, "rewards/margins": 0.17561408877372742, "rewards/rejected": -0.8027262687683105, "step": 182 }, { "epoch": 0.24, "learning_rate": 4.982332599429187e-05, "logits/chosen": -2.5083706378936768, "logits/rejected": -2.5868587493896484, "logps/chosen": -145.9921112060547, "logps/rejected": -150.42713928222656, "loss": 0.6945, "rewards/accuracies": 0.4375, "rewards/chosen": -0.45637643337249756, "rewards/margins": 0.06362758576869965, "rewards/rejected": -0.5200040340423584, "step": 183 }, { "epoch": 0.24, "learning_rate": 4.981904831752171e-05, "logits/chosen": -2.563215970993042, "logits/rejected": -2.6187920570373535, "logps/chosen": -147.48265075683594, "logps/rejected": -155.67405700683594, "loss": 0.7287, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5919477939605713, "rewards/margins": -0.029735613614320755, "rewards/rejected": -0.5622121691703796, "step": 184 }, { "epoch": 0.24, "learning_rate": 4.981471966029787e-05, "logits/chosen": -2.447539806365967, "logits/rejected": -2.4295990467071533, "logps/chosen": -153.93881225585938, "logps/rejected": -169.62408447265625, "loss": 0.6502, "rewards/accuracies": 0.5, "rewards/chosen": -0.6545721292495728, "rewards/margins": 0.11100000143051147, "rewards/rejected": -0.7655720710754395, "step": 185 }, { "epoch": 0.24, "learning_rate": 4.981034003151178e-05, "logits/chosen": -2.4045794010162354, "logits/rejected": -2.446890354156494, "logps/chosen": -134.2223358154297, "logps/rejected": -149.72105407714844, "loss": 0.6335, "rewards/accuracies": 0.5625, "rewards/chosen": -0.46206632256507874, "rewards/margins": 0.19417575001716614, "rewards/rejected": -0.6562421321868896, "step": 186 }, { "epoch": 0.24, "learning_rate": 4.980590944015958e-05, "logits/chosen": -2.68265700340271, "logits/rejected": -2.667114496231079, "logps/chosen": -167.9902801513672, "logps/rejected": -171.3512420654297, "loss": 0.6734, "rewards/accuracies": 0.5, "rewards/chosen": -0.5563911199569702, "rewards/margins": 0.08469439297914505, "rewards/rejected": -0.6410855054855347, "step": 187 }, { "epoch": 0.25, "learning_rate": 4.98014278953421e-05, "logits/chosen": -2.5728375911712646, "logits/rejected": -2.681403875350952, "logps/chosen": -159.7633056640625, "logps/rejected": -211.886962890625, "loss": 0.6439, "rewards/accuracies": 0.625, "rewards/chosen": -0.531548023223877, "rewards/margins": 0.1921529322862625, "rewards/rejected": -0.7237009406089783, "step": 188 }, { "epoch": 0.25, "learning_rate": 4.979689540626479e-05, "logits/chosen": -2.324286937713623, "logits/rejected": -2.453277349472046, "logps/chosen": -168.53738403320312, "logps/rejected": -180.93719482421875, "loss": 0.6538, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4037819802761078, "rewards/margins": 0.17486746609210968, "rewards/rejected": -0.5786494016647339, "step": 189 }, { "epoch": 0.25, "learning_rate": 4.9792311982237774e-05, "logits/chosen": -2.773432493209839, "logits/rejected": -2.74585223197937, "logps/chosen": -157.39044189453125, "logps/rejected": -167.32725524902344, "loss": 0.6255, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5002456307411194, "rewards/margins": 0.22034718096256256, "rewards/rejected": -0.7205928564071655, "step": 190 }, { "epoch": 0.25, "learning_rate": 4.9787677632675825e-05, "logits/chosen": -2.6729888916015625, "logits/rejected": -2.7148032188415527, "logps/chosen": -162.77774047851562, "logps/rejected": -221.85386657714844, "loss": 0.6881, "rewards/accuracies": 0.625, "rewards/chosen": -0.6836182475090027, "rewards/margins": 0.14588770270347595, "rewards/rejected": -0.829505980014801, "step": 191 }, { "epoch": 0.25, "learning_rate": 4.978299236709826e-05, "logits/chosen": -2.556713581085205, "logits/rejected": -2.5743775367736816, "logps/chosen": -197.9322052001953, "logps/rejected": -203.74917602539062, "loss": 0.6874, "rewards/accuracies": 0.5, "rewards/chosen": -0.7575306296348572, "rewards/margins": 0.05150505527853966, "rewards/rejected": -0.80903559923172, "step": 192 }, { "epoch": 0.25, "learning_rate": 4.977825619512904e-05, "logits/chosen": -2.390803813934326, "logits/rejected": -2.551340341567993, "logps/chosen": -152.84097290039062, "logps/rejected": -192.7227325439453, "loss": 0.7108, "rewards/accuracies": 0.5, "rewards/chosen": -0.5909304618835449, "rewards/margins": -0.009836459532380104, "rewards/rejected": -0.581093966960907, "step": 193 }, { "epoch": 0.25, "learning_rate": 4.977346912649666e-05, "logits/chosen": -2.451486587524414, "logits/rejected": -2.423938751220703, "logps/chosen": -208.7131805419922, "logps/rejected": -195.0940399169922, "loss": 0.7075, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5515446066856384, "rewards/margins": 0.014907769858837128, "rewards/rejected": -0.5664523839950562, "step": 194 }, { "epoch": 0.26, "learning_rate": 4.9768631171034175e-05, "logits/chosen": -2.4102437496185303, "logits/rejected": -2.524508237838745, "logps/chosen": -171.2530975341797, "logps/rejected": -197.77008056640625, "loss": 0.6437, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6411187648773193, "rewards/margins": 0.20387138426303864, "rewards/rejected": -0.8449901342391968, "step": 195 }, { "epoch": 0.26, "learning_rate": 4.9763742338679145e-05, "logits/chosen": -2.6650915145874023, "logits/rejected": -2.5682055950164795, "logps/chosen": -280.52142333984375, "logps/rejected": -250.82424926757812, "loss": 0.6956, "rewards/accuracies": 0.5, "rewards/chosen": -0.7708845138549805, "rewards/margins": 0.03139163926243782, "rewards/rejected": -0.8022761344909668, "step": 196 }, { "epoch": 0.26, "learning_rate": 4.975880263947367e-05, "logits/chosen": -2.66872239112854, "logits/rejected": -2.6272640228271484, "logps/chosen": -206.67318725585938, "logps/rejected": -169.04757690429688, "loss": 0.725, "rewards/accuracies": 0.5, "rewards/chosen": -0.7810104489326477, "rewards/margins": -0.02610369399189949, "rewards/rejected": -0.7549068331718445, "step": 197 }, { "epoch": 0.26, "learning_rate": 4.9753812083564304e-05, "logits/chosen": -2.4464945793151855, "logits/rejected": -2.4812588691711426, "logps/chosen": -153.33660888671875, "logps/rejected": -176.61399841308594, "loss": 0.6034, "rewards/accuracies": 0.5625, "rewards/chosen": -0.44514474272727966, "rewards/margins": 0.24550259113311768, "rewards/rejected": -0.6906473636627197, "step": 198 }, { "epoch": 0.26, "learning_rate": 4.974877068120208e-05, "logits/chosen": -2.635669231414795, "logits/rejected": -2.648510456085205, "logps/chosen": -182.48388671875, "logps/rejected": -196.19873046875, "loss": 0.6649, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6389026641845703, "rewards/margins": 0.10669447481632233, "rewards/rejected": -0.7455971240997314, "step": 199 }, { "epoch": 0.26, "learning_rate": 4.974367844274248e-05, "logits/chosen": -2.5759198665618896, "logits/rejected": -2.723337173461914, "logps/chosen": -179.0137939453125, "logps/rejected": -255.97052001953125, "loss": 0.6338, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4965980052947998, "rewards/margins": 0.18533286452293396, "rewards/rejected": -0.6819308996200562, "step": 200 }, { "epoch": 0.26, "learning_rate": 4.973853537864538e-05, "logits/chosen": -2.7438008785247803, "logits/rejected": -2.8121700286865234, "logps/chosen": -160.9208221435547, "logps/rejected": -169.40225219726562, "loss": 0.6689, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6913070678710938, "rewards/margins": 0.10465458035469055, "rewards/rejected": -0.7959617376327515, "step": 201 }, { "epoch": 0.26, "learning_rate": 4.973334149947508e-05, "logits/chosen": -2.70800518989563, "logits/rejected": -2.6374926567077637, "logps/chosen": -183.4274444580078, "logps/rejected": -180.7521209716797, "loss": 0.6144, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7033702731132507, "rewards/margins": 0.2224593162536621, "rewards/rejected": -0.9258295893669128, "step": 202 }, { "epoch": 0.27, "learning_rate": 4.972809681590026e-05, "logits/chosen": -2.66047739982605, "logits/rejected": -2.710866928100586, "logps/chosen": -194.5672607421875, "logps/rejected": -210.85208129882812, "loss": 0.655, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8557997345924377, "rewards/margins": 0.16004985570907593, "rewards/rejected": -1.0158495903015137, "step": 203 }, { "epoch": 0.27, "learning_rate": 4.972280133869396e-05, "logits/chosen": -2.433838129043579, "logits/rejected": -2.564758539199829, "logps/chosen": -171.2584686279297, "logps/rejected": -213.25494384765625, "loss": 0.5993, "rewards/accuracies": 0.6875, "rewards/chosen": -0.43924885988235474, "rewards/margins": 0.2442169040441513, "rewards/rejected": -0.6834657788276672, "step": 204 }, { "epoch": 0.27, "learning_rate": 4.971745507873352e-05, "logits/chosen": -2.681500196456909, "logits/rejected": -2.6873316764831543, "logps/chosen": -150.2528839111328, "logps/rejected": -154.3059844970703, "loss": 0.627, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7566659450531006, "rewards/margins": 0.23124736547470093, "rewards/rejected": -0.9879133701324463, "step": 205 }, { "epoch": 0.27, "learning_rate": 4.971205804700063e-05, "logits/chosen": -2.464470624923706, "logits/rejected": -2.3958442211151123, "logps/chosen": -293.574462890625, "logps/rejected": -252.32489013671875, "loss": 0.6749, "rewards/accuracies": 0.5, "rewards/chosen": -0.5428895354270935, "rewards/margins": 0.14122185111045837, "rewards/rejected": -0.6841113567352295, "step": 206 }, { "epoch": 0.27, "learning_rate": 4.970661025458125e-05, "logits/chosen": -2.5775954723358154, "logits/rejected": -2.6041054725646973, "logps/chosen": -170.66627502441406, "logps/rejected": -163.29644775390625, "loss": 0.721, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9231572151184082, "rewards/margins": 0.11258751899003983, "rewards/rejected": -1.0357446670532227, "step": 207 }, { "epoch": 0.27, "learning_rate": 4.9701111712665625e-05, "logits/chosen": -2.6646294593811035, "logits/rejected": -2.7594456672668457, "logps/chosen": -200.36981201171875, "logps/rejected": -186.59059143066406, "loss": 0.7514, "rewards/accuracies": 0.4375, "rewards/chosen": -1.3253411054611206, "rewards/margins": -0.0334821492433548, "rewards/rejected": -1.2918590307235718, "step": 208 }, { "epoch": 0.27, "learning_rate": 4.969556243254822e-05, "logits/chosen": -2.5144646167755127, "logits/rejected": -2.594902992248535, "logps/chosen": -147.14231872558594, "logps/rejected": -176.36328125, "loss": 0.6548, "rewards/accuracies": 0.75, "rewards/chosen": -0.6609407067298889, "rewards/margins": 0.12413067370653152, "rewards/rejected": -0.7850713729858398, "step": 209 }, { "epoch": 0.27, "learning_rate": 4.968996242562774e-05, "logits/chosen": -2.6077287197113037, "logits/rejected": -2.6607818603515625, "logps/chosen": -199.4670867919922, "logps/rejected": -201.5868377685547, "loss": 0.6797, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8754231333732605, "rewards/margins": 0.13014619052410126, "rewards/rejected": -1.005569338798523, "step": 210 }, { "epoch": 0.28, "learning_rate": 4.968431170340706e-05, "logits/chosen": -2.740494966506958, "logits/rejected": -2.620009660720825, "logps/chosen": -210.96929931640625, "logps/rejected": -204.82090759277344, "loss": 0.7721, "rewards/accuracies": 0.5, "rewards/chosen": -1.2561805248260498, "rewards/margins": -0.05324437841773033, "rewards/rejected": -1.2029361724853516, "step": 211 }, { "epoch": 0.28, "learning_rate": 4.9678610277493275e-05, "logits/chosen": -2.6105682849884033, "logits/rejected": -2.5892579555511475, "logps/chosen": -198.35635375976562, "logps/rejected": -207.89016723632812, "loss": 0.6961, "rewards/accuracies": 0.5625, "rewards/chosen": -0.989006757736206, "rewards/margins": 0.060812097042798996, "rewards/rejected": -1.0498188734054565, "step": 212 }, { "epoch": 0.28, "learning_rate": 4.967285815959759e-05, "logits/chosen": -2.714409589767456, "logits/rejected": -2.7895750999450684, "logps/chosen": -208.87754821777344, "logps/rejected": -222.0183563232422, "loss": 0.57, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8426048159599304, "rewards/margins": 0.3045133650302887, "rewards/rejected": -1.1471182107925415, "step": 213 }, { "epoch": 0.28, "learning_rate": 4.9667055361535354e-05, "logits/chosen": -2.748204231262207, "logits/rejected": -2.832871675491333, "logps/chosen": -201.07078552246094, "logps/rejected": -212.4540252685547, "loss": 0.6954, "rewards/accuracies": 0.5625, "rewards/chosen": -1.2435117959976196, "rewards/margins": 0.14082738757133484, "rewards/rejected": -1.384339451789856, "step": 214 }, { "epoch": 0.28, "learning_rate": 4.9661201895226e-05, "logits/chosen": -2.7127251625061035, "logits/rejected": -2.751798629760742, "logps/chosen": -220.7108154296875, "logps/rejected": -195.08290100097656, "loss": 0.6729, "rewards/accuracies": 0.5, "rewards/chosen": -0.7872539758682251, "rewards/margins": 0.12220478057861328, "rewards/rejected": -0.9094586968421936, "step": 215 }, { "epoch": 0.28, "learning_rate": 4.965529777269306e-05, "logits/chosen": -2.6204776763916016, "logits/rejected": -2.664301872253418, "logps/chosen": -166.66172790527344, "logps/rejected": -205.79847717285156, "loss": 0.7922, "rewards/accuracies": 0.5, "rewards/chosen": -0.9887948036193848, "rewards/margins": -0.06140782684087753, "rewards/rejected": -0.927386999130249, "step": 216 }, { "epoch": 0.28, "learning_rate": 4.964934300606411e-05, "logits/chosen": -2.48382830619812, "logits/rejected": -2.493025541305542, "logps/chosen": -169.9669189453125, "logps/rejected": -181.6856689453125, "loss": 0.7197, "rewards/accuracies": 0.75, "rewards/chosen": -0.6650858521461487, "rewards/margins": 0.017848990857601166, "rewards/rejected": -0.6829348802566528, "step": 217 }, { "epoch": 0.29, "learning_rate": 4.964333760757074e-05, "logits/chosen": -2.648463726043701, "logits/rejected": -2.6576759815216064, "logps/chosen": -178.34747314453125, "logps/rejected": -188.2498779296875, "loss": 0.6239, "rewards/accuracies": 0.6875, "rewards/chosen": -0.95872962474823, "rewards/margins": 0.26567110419273376, "rewards/rejected": -1.2244007587432861, "step": 218 }, { "epoch": 0.29, "learning_rate": 4.963728158954856e-05, "logits/chosen": -2.9130921363830566, "logits/rejected": -2.894216299057007, "logps/chosen": -221.04931640625, "logps/rejected": -237.8624725341797, "loss": 0.6594, "rewards/accuracies": 0.5625, "rewards/chosen": -1.084302306175232, "rewards/margins": 0.13000546395778656, "rewards/rejected": -1.2143077850341797, "step": 219 }, { "epoch": 0.29, "learning_rate": 4.963117496443715e-05, "logits/chosen": -2.68157958984375, "logits/rejected": -2.8279314041137695, "logps/chosen": -165.5657196044922, "logps/rejected": -202.75865173339844, "loss": 0.6575, "rewards/accuracies": 0.625, "rewards/chosen": -0.7596678733825684, "rewards/margins": 0.2009599506855011, "rewards/rejected": -0.9606277942657471, "step": 220 }, { "epoch": 0.29, "learning_rate": 4.9625017744780045e-05, "logits/chosen": -2.614312171936035, "logits/rejected": -2.6585357189178467, "logps/chosen": -216.45230102539062, "logps/rejected": -195.06643676757812, "loss": 0.7323, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9276168346405029, "rewards/margins": -0.02379007264971733, "rewards/rejected": -0.9038268327713013, "step": 221 }, { "epoch": 0.29, "learning_rate": 4.96188099432247e-05, "logits/chosen": -2.7345945835113525, "logits/rejected": -2.702479362487793, "logps/chosen": -227.8175048828125, "logps/rejected": -238.62513732910156, "loss": 0.6852, "rewards/accuracies": 0.5, "rewards/chosen": -0.9885910749435425, "rewards/margins": 0.07757923752069473, "rewards/rejected": -1.0661702156066895, "step": 222 }, { "epoch": 0.29, "learning_rate": 4.9612551572522464e-05, "logits/chosen": -2.6463451385498047, "logits/rejected": -2.6843183040618896, "logps/chosen": -150.67254638671875, "logps/rejected": -155.01316833496094, "loss": 0.6833, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0718083381652832, "rewards/margins": 0.0895613357424736, "rewards/rejected": -1.1613696813583374, "step": 223 }, { "epoch": 0.29, "learning_rate": 4.960624264552858e-05, "logits/chosen": -2.6091978549957275, "logits/rejected": -2.6224098205566406, "logps/chosen": -134.08544921875, "logps/rejected": -145.00137329101562, "loss": 0.617, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6298830509185791, "rewards/margins": 0.1894720196723938, "rewards/rejected": -0.8193551301956177, "step": 224 }, { "epoch": 0.29, "learning_rate": 4.9599883175202124e-05, "logits/chosen": -2.689610004425049, "logits/rejected": -2.641515016555786, "logps/chosen": -175.27386474609375, "logps/rejected": -176.47059631347656, "loss": 0.6933, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8815383911132812, "rewards/margins": 0.05579657852649689, "rewards/rejected": -0.9373350143432617, "step": 225 }, { "epoch": 0.3, "learning_rate": 4.9593473174605974e-05, "logits/chosen": -2.673809051513672, "logits/rejected": -2.6921756267547607, "logps/chosen": -210.48333740234375, "logps/rejected": -222.2653045654297, "loss": 0.7605, "rewards/accuracies": 0.3125, "rewards/chosen": -0.9074676036834717, "rewards/margins": -0.08400504291057587, "rewards/rejected": -0.8234625458717346, "step": 226 }, { "epoch": 0.3, "learning_rate": 4.958701265690685e-05, "logits/chosen": -2.608705759048462, "logits/rejected": -2.622281074523926, "logps/chosen": -194.5504150390625, "logps/rejected": -197.58035278320312, "loss": 0.7456, "rewards/accuracies": 0.4375, "rewards/chosen": -1.1094239950180054, "rewards/margins": -0.014511600136756897, "rewards/rejected": -1.0949124097824097, "step": 227 }, { "epoch": 0.3, "learning_rate": 4.958050163537519e-05, "logits/chosen": -2.598935127258301, "logits/rejected": -2.648134231567383, "logps/chosen": -135.84756469726562, "logps/rejected": -160.54318237304688, "loss": 0.7163, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7799862623214722, "rewards/margins": 0.07278753817081451, "rewards/rejected": -0.8527737855911255, "step": 228 }, { "epoch": 0.3, "learning_rate": 4.957394012338519e-05, "logits/chosen": -2.5633938312530518, "logits/rejected": -2.528005838394165, "logps/chosen": -179.5242919921875, "logps/rejected": -175.9528350830078, "loss": 0.6776, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6330910325050354, "rewards/margins": 0.09188088774681091, "rewards/rejected": -0.7249718904495239, "step": 229 }, { "epoch": 0.3, "learning_rate": 4.956732813441477e-05, "logits/chosen": -2.681288957595825, "logits/rejected": -2.742069959640503, "logps/chosen": -157.13189697265625, "logps/rejected": -170.25006103515625, "loss": 0.6152, "rewards/accuracies": 0.75, "rewards/chosen": -0.7350885272026062, "rewards/margins": 0.18710875511169434, "rewards/rejected": -0.9221972227096558, "step": 230 }, { "epoch": 0.3, "learning_rate": 4.956066568204552e-05, "logits/chosen": -2.6132476329803467, "logits/rejected": -2.6484899520874023, "logps/chosen": -175.98236083984375, "logps/rejected": -185.96463012695312, "loss": 0.5907, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6415359973907471, "rewards/margins": 0.3225414752960205, "rewards/rejected": -0.9640775322914124, "step": 231 }, { "epoch": 0.3, "learning_rate": 4.955395277996268e-05, "logits/chosen": -2.654163122177124, "logits/rejected": -2.6380820274353027, "logps/chosen": -191.48721313476562, "logps/rejected": -160.77561950683594, "loss": 0.724, "rewards/accuracies": 0.625, "rewards/chosen": -0.868155837059021, "rewards/margins": 0.054594431072473526, "rewards/rejected": -0.9227503538131714, "step": 232 }, { "epoch": 0.3, "learning_rate": 4.954718944195512e-05, "logits/chosen": -2.6072440147399902, "logits/rejected": -2.6623966693878174, "logps/chosen": -184.31289672851562, "logps/rejected": -173.9798126220703, "loss": 0.7546, "rewards/accuracies": 0.3125, "rewards/chosen": -0.8332484364509583, "rewards/margins": -0.08707739412784576, "rewards/rejected": -0.7461711168289185, "step": 233 }, { "epoch": 0.31, "learning_rate": 4.954037568191534e-05, "logits/chosen": -2.6110591888427734, "logits/rejected": -2.568448781967163, "logps/chosen": -222.3007049560547, "logps/rejected": -193.83935546875, "loss": 0.769, "rewards/accuracies": 0.4375, "rewards/chosen": -0.9347423315048218, "rewards/margins": -0.09616127610206604, "rewards/rejected": -0.8385810256004333, "step": 234 }, { "epoch": 0.31, "learning_rate": 4.9533511513839384e-05, "logits/chosen": -2.7532308101654053, "logits/rejected": -2.7335729598999023, "logps/chosen": -218.836669921875, "logps/rejected": -247.48204040527344, "loss": 0.7134, "rewards/accuracies": 0.5, "rewards/chosen": -0.9885715246200562, "rewards/margins": 0.15268389880657196, "rewards/rejected": -1.1412553787231445, "step": 235 }, { "epoch": 0.31, "learning_rate": 4.9526596951826824e-05, "logits/chosen": -2.6721489429473877, "logits/rejected": -2.6597042083740234, "logps/chosen": -187.82127380371094, "logps/rejected": -174.8271026611328, "loss": 0.6244, "rewards/accuracies": 0.625, "rewards/chosen": -0.5805238485336304, "rewards/margins": 0.22618348896503448, "rewards/rejected": -0.8067073225975037, "step": 236 }, { "epoch": 0.31, "learning_rate": 4.951963201008076e-05, "logits/chosen": -2.855266571044922, "logits/rejected": -2.8631551265716553, "logps/chosen": -243.0494384765625, "logps/rejected": -228.80966186523438, "loss": 0.7247, "rewards/accuracies": 0.4375, "rewards/chosen": -0.934760332107544, "rewards/margins": 0.034967467188835144, "rewards/rejected": -0.9697277545928955, "step": 237 }, { "epoch": 0.31, "learning_rate": 4.951261670290781e-05, "logits/chosen": -2.664848566055298, "logits/rejected": -2.730018138885498, "logps/chosen": -192.22723388671875, "logps/rejected": -171.22459411621094, "loss": 0.6765, "rewards/accuracies": 0.625, "rewards/chosen": -0.6986839771270752, "rewards/margins": 0.09998993575572968, "rewards/rejected": -0.7986739277839661, "step": 238 }, { "epoch": 0.31, "learning_rate": 4.950555104471799e-05, "logits/chosen": -2.6024093627929688, "logits/rejected": -2.6131458282470703, "logps/chosen": -164.9868927001953, "logps/rejected": -153.97813415527344, "loss": 0.7123, "rewards/accuracies": 0.625, "rewards/chosen": -0.7849129438400269, "rewards/margins": 0.029222920536994934, "rewards/rejected": -0.8141359090805054, "step": 239 }, { "epoch": 0.31, "learning_rate": 4.949843505002477e-05, "logits/chosen": -2.509110689163208, "logits/rejected": -2.5836997032165527, "logps/chosen": -156.37550354003906, "logps/rejected": -177.84518432617188, "loss": 0.5906, "rewards/accuracies": 0.75, "rewards/chosen": -0.40633201599121094, "rewards/margins": 0.2858356237411499, "rewards/rejected": -0.6921676397323608, "step": 240 }, { "epoch": 0.32, "learning_rate": 4.9491268733445034e-05, "logits/chosen": -2.5929789543151855, "logits/rejected": -2.593715190887451, "logps/chosen": -159.74212646484375, "logps/rejected": -177.87425231933594, "loss": 0.6413, "rewards/accuracies": 0.625, "rewards/chosen": -0.4330030679702759, "rewards/margins": 0.15460006892681122, "rewards/rejected": -0.5876031517982483, "step": 241 }, { "epoch": 0.32, "learning_rate": 4.9484052109698984e-05, "logits/chosen": -2.581435441970825, "logits/rejected": -2.598817825317383, "logps/chosen": -169.76776123046875, "logps/rejected": -170.72422790527344, "loss": 0.6379, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5581039786338806, "rewards/margins": 0.1779794692993164, "rewards/rejected": -0.736083447933197, "step": 242 }, { "epoch": 0.32, "learning_rate": 4.947678519361021e-05, "logits/chosen": -2.5178732872009277, "logits/rejected": -2.5693912506103516, "logps/chosen": -173.69699096679688, "logps/rejected": -174.0066680908203, "loss": 0.6503, "rewards/accuracies": 0.75, "rewards/chosen": -0.5450050830841064, "rewards/margins": 0.11375146359205246, "rewards/rejected": -0.6587565541267395, "step": 243 }, { "epoch": 0.32, "learning_rate": 4.946946800010556e-05, "logits/chosen": -2.5523407459259033, "logits/rejected": -2.537888288497925, "logps/chosen": -175.92843627929688, "logps/rejected": -160.6066436767578, "loss": 0.7642, "rewards/accuracies": 0.375, "rewards/chosen": -0.7941773533821106, "rewards/margins": -0.09637541323900223, "rewards/rejected": -0.697801947593689, "step": 244 }, { "epoch": 0.32, "learning_rate": 4.946210054421518e-05, "logits/chosen": -2.688391923904419, "logits/rejected": -2.702990770339966, "logps/chosen": -222.63352966308594, "logps/rejected": -215.47323608398438, "loss": 0.6372, "rewards/accuracies": 0.75, "rewards/chosen": -0.6142177581787109, "rewards/margins": 0.2073356807231903, "rewards/rejected": -0.8215534687042236, "step": 245 }, { "epoch": 0.32, "learning_rate": 4.945468284107246e-05, "logits/chosen": -2.5451714992523193, "logits/rejected": -2.5484871864318848, "logps/chosen": -196.5067138671875, "logps/rejected": -190.59320068359375, "loss": 0.6548, "rewards/accuracies": 0.625, "rewards/chosen": -0.5240797996520996, "rewards/margins": 0.11533726006746292, "rewards/rejected": -0.6394170522689819, "step": 246 }, { "epoch": 0.32, "learning_rate": 4.944721490591401e-05, "logits/chosen": -2.472393274307251, "logits/rejected": -2.587679386138916, "logps/chosen": -165.8987274169922, "logps/rejected": -188.1234130859375, "loss": 0.5868, "rewards/accuracies": 0.875, "rewards/chosen": -0.4879373013973236, "rewards/margins": 0.2605348229408264, "rewards/rejected": -0.7484720349311829, "step": 247 }, { "epoch": 0.32, "learning_rate": 4.9439696754079595e-05, "logits/chosen": -2.800347089767456, "logits/rejected": -2.7822184562683105, "logps/chosen": -248.7989044189453, "logps/rejected": -214.1162567138672, "loss": 0.8436, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8426647782325745, "rewards/margins": -0.20171405375003815, "rewards/rejected": -0.6409507393836975, "step": 248 }, { "epoch": 0.33, "learning_rate": 4.9432128401012144e-05, "logits/chosen": -2.5632007122039795, "logits/rejected": -2.5047194957733154, "logps/chosen": -185.19070434570312, "logps/rejected": -204.64364624023438, "loss": 0.7785, "rewards/accuracies": 0.375, "rewards/chosen": -0.7219403386116028, "rewards/margins": -0.07751601189374924, "rewards/rejected": -0.6444243788719177, "step": 249 }, { "epoch": 0.33, "learning_rate": 4.9424509862257706e-05, "logits/chosen": -2.529867649078369, "logits/rejected": -2.5607314109802246, "logps/chosen": -219.9513397216797, "logps/rejected": -240.11671447753906, "loss": 0.6592, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4247688055038452, "rewards/margins": 0.10957615077495575, "rewards/rejected": -0.5343449115753174, "step": 250 }, { "epoch": 0.33, "learning_rate": 4.941684115346541e-05, "logits/chosen": -2.7805376052856445, "logits/rejected": -2.837836742401123, "logps/chosen": -177.21543884277344, "logps/rejected": -213.7078094482422, "loss": 0.6014, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5495901107788086, "rewards/margins": 0.268564373254776, "rewards/rejected": -0.8181545734405518, "step": 251 }, { "epoch": 0.33, "learning_rate": 4.940912229038745e-05, "logits/chosen": -2.56017804145813, "logits/rejected": -2.5503697395324707, "logps/chosen": -170.02735900878906, "logps/rejected": -161.25509643554688, "loss": 0.7194, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5123621225357056, "rewards/margins": -0.0268712155520916, "rewards/rejected": -0.4854908883571625, "step": 252 }, { "epoch": 0.33, "learning_rate": 4.9401353288879024e-05, "logits/chosen": -2.5448572635650635, "logits/rejected": -2.56659197807312, "logps/chosen": -169.89077758789062, "logps/rejected": -190.93545532226562, "loss": 0.6295, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4579932391643524, "rewards/margins": 0.1804841011762619, "rewards/rejected": -0.6384773254394531, "step": 253 }, { "epoch": 0.33, "learning_rate": 4.9393534164898335e-05, "logits/chosen": -2.55391526222229, "logits/rejected": -2.5868449211120605, "logps/chosen": -158.18197631835938, "logps/rejected": -197.00271606445312, "loss": 0.6476, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6932105422019958, "rewards/margins": 0.13315898180007935, "rewards/rejected": -0.8263695240020752, "step": 254 }, { "epoch": 0.33, "learning_rate": 4.9385664934506526e-05, "logits/chosen": -2.554259777069092, "logits/rejected": -2.684239625930786, "logps/chosen": -159.8101348876953, "logps/rejected": -183.30532836914062, "loss": 0.6516, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5770647525787354, "rewards/margins": 0.14322030544281006, "rewards/rejected": -0.7202850580215454, "step": 255 }, { "epoch": 0.34, "learning_rate": 4.937774561386768e-05, "logits/chosen": -2.5128979682922363, "logits/rejected": -2.6388025283813477, "logps/chosen": -174.58401489257812, "logps/rejected": -184.96910095214844, "loss": 0.577, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5345829129219055, "rewards/margins": 0.3504784107208252, "rewards/rejected": -0.8850612640380859, "step": 256 }, { "epoch": 0.34, "learning_rate": 4.936977621924875e-05, "logits/chosen": -2.6937482357025146, "logits/rejected": -2.650275707244873, "logps/chosen": -165.79302978515625, "logps/rejected": -177.26170349121094, "loss": 0.6382, "rewards/accuracies": 0.5625, "rewards/chosen": -0.614943265914917, "rewards/margins": 0.14949731528759003, "rewards/rejected": -0.7644405961036682, "step": 257 }, { "epoch": 0.34, "learning_rate": 4.9361756767019564e-05, "logits/chosen": -2.5641212463378906, "logits/rejected": -2.6175105571746826, "logps/chosen": -182.44837951660156, "logps/rejected": -225.40805053710938, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": -0.6991927027702332, "rewards/margins": 0.0908452570438385, "rewards/rejected": -0.790037989616394, "step": 258 }, { "epoch": 0.34, "learning_rate": 4.935368727365276e-05, "logits/chosen": -2.6357033252716064, "logits/rejected": -2.57110857963562, "logps/chosen": -182.70291137695312, "logps/rejected": -223.89857482910156, "loss": 0.6278, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7485102415084839, "rewards/margins": 0.1697012186050415, "rewards/rejected": -0.9182114601135254, "step": 259 }, { "epoch": 0.34, "learning_rate": 4.934556775572377e-05, "logits/chosen": -2.6168265342712402, "logits/rejected": -2.6644468307495117, "logps/chosen": -165.17758178710938, "logps/rejected": -188.50119018554688, "loss": 0.7554, "rewards/accuracies": 0.25, "rewards/chosen": -0.5862306356430054, "rewards/margins": -0.08211595565080643, "rewards/rejected": -0.5041146874427795, "step": 260 }, { "epoch": 0.34, "learning_rate": 4.9337398229910784e-05, "logits/chosen": -2.595906972885132, "logits/rejected": -2.594231128692627, "logps/chosen": -188.159912109375, "logps/rejected": -176.15074157714844, "loss": 0.6936, "rewards/accuracies": 0.5, "rewards/chosen": -0.6705946922302246, "rewards/margins": 0.03840280696749687, "rewards/rejected": -0.7089974284172058, "step": 261 }, { "epoch": 0.34, "learning_rate": 4.932917871299471e-05, "logits/chosen": -2.672065496444702, "logits/rejected": -2.6840691566467285, "logps/chosen": -173.274169921875, "logps/rejected": -183.5944061279297, "loss": 0.6154, "rewards/accuracies": 0.75, "rewards/chosen": -0.48323020339012146, "rewards/margins": 0.2292642742395401, "rewards/rejected": -0.7124944925308228, "step": 262 }, { "epoch": 0.34, "learning_rate": 4.9320909221859134e-05, "logits/chosen": -2.660583972930908, "logits/rejected": -2.7142574787139893, "logps/chosen": -192.77145385742188, "logps/rejected": -185.00450134277344, "loss": 0.6823, "rewards/accuracies": 0.625, "rewards/chosen": -0.7117425799369812, "rewards/margins": 0.10454593598842621, "rewards/rejected": -0.8162885308265686, "step": 263 }, { "epoch": 0.35, "learning_rate": 4.9312589773490304e-05, "logits/chosen": -2.5676193237304688, "logits/rejected": -2.510430335998535, "logps/chosen": -176.65432739257812, "logps/rejected": -154.3079833984375, "loss": 0.7678, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8498928546905518, "rewards/margins": -0.05464668944478035, "rewards/rejected": -0.7952461242675781, "step": 264 }, { "epoch": 0.35, "learning_rate": 4.930422038497708e-05, "logits/chosen": -2.5558042526245117, "logits/rejected": -2.672940492630005, "logps/chosen": -173.69500732421875, "logps/rejected": -180.78118896484375, "loss": 0.5704, "rewards/accuracies": 0.625, "rewards/chosen": -0.5608630776405334, "rewards/margins": 0.38767609000205994, "rewards/rejected": -0.948539137840271, "step": 265 }, { "epoch": 0.35, "learning_rate": 4.92958010735109e-05, "logits/chosen": -2.508685350418091, "logits/rejected": -2.4861252307891846, "logps/chosen": -196.0287628173828, "logps/rejected": -212.8455810546875, "loss": 0.6453, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6239847540855408, "rewards/margins": 0.18039949238300323, "rewards/rejected": -0.8043842315673828, "step": 266 }, { "epoch": 0.35, "learning_rate": 4.928733185638575e-05, "logits/chosen": -2.641526222229004, "logits/rejected": -2.630765199661255, "logps/chosen": -205.0409698486328, "logps/rejected": -201.60269165039062, "loss": 0.651, "rewards/accuracies": 0.75, "rewards/chosen": -0.7301749587059021, "rewards/margins": 0.1289985626935959, "rewards/rejected": -0.859173595905304, "step": 267 }, { "epoch": 0.35, "learning_rate": 4.927881275099815e-05, "logits/chosen": -2.5749480724334717, "logits/rejected": -2.5817878246307373, "logps/chosen": -187.1511688232422, "logps/rejected": -236.642822265625, "loss": 0.6596, "rewards/accuracies": 0.625, "rewards/chosen": -0.6212616562843323, "rewards/margins": 0.13300594687461853, "rewards/rejected": -0.7542675733566284, "step": 268 }, { "epoch": 0.35, "learning_rate": 4.927024377484705e-05, "logits/chosen": -2.6451222896575928, "logits/rejected": -2.700425863265991, "logps/chosen": -176.2154083251953, "logps/rejected": -211.41744995117188, "loss": 0.7046, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8719490766525269, "rewards/margins": 0.05133984610438347, "rewards/rejected": -0.9232889413833618, "step": 269 }, { "epoch": 0.35, "learning_rate": 4.9261624945533855e-05, "logits/chosen": -2.5780887603759766, "logits/rejected": -2.669583320617676, "logps/chosen": -181.3977813720703, "logps/rejected": -242.43626403808594, "loss": 0.6873, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6802850365638733, "rewards/margins": 0.029887204989790916, "rewards/rejected": -0.7101722955703735, "step": 270 }, { "epoch": 0.35, "learning_rate": 4.925295628076241e-05, "logits/chosen": -2.6090972423553467, "logits/rejected": -2.683279275894165, "logps/chosen": -192.7779541015625, "logps/rejected": -241.75607299804688, "loss": 0.6072, "rewards/accuracies": 0.625, "rewards/chosen": -0.7047219276428223, "rewards/margins": 0.2544190287590027, "rewards/rejected": -0.9591410160064697, "step": 271 }, { "epoch": 0.36, "learning_rate": 4.9244237798338866e-05, "logits/chosen": -2.7260890007019043, "logits/rejected": -2.7258718013763428, "logps/chosen": -206.13421630859375, "logps/rejected": -211.11663818359375, "loss": 0.7085, "rewards/accuracies": 0.5625, "rewards/chosen": -0.855944037437439, "rewards/margins": 0.07277999073266983, "rewards/rejected": -0.9287241101264954, "step": 272 }, { "epoch": 0.36, "learning_rate": 4.923546951617175e-05, "logits/chosen": -2.648552417755127, "logits/rejected": -2.5934343338012695, "logps/chosen": -170.73721313476562, "logps/rejected": -186.487548828125, "loss": 0.6374, "rewards/accuracies": 0.625, "rewards/chosen": -0.7491826415061951, "rewards/margins": 0.1971735656261444, "rewards/rejected": -0.9463562369346619, "step": 273 }, { "epoch": 0.36, "learning_rate": 4.922665145227187e-05, "logits/chosen": -2.5815610885620117, "logits/rejected": -2.5234570503234863, "logps/chosen": -153.86798095703125, "logps/rejected": -140.2138671875, "loss": 0.8232, "rewards/accuracies": 0.4375, "rewards/chosen": -0.9449439644813538, "rewards/margins": -0.13928692042827606, "rewards/rejected": -0.8056570291519165, "step": 274 }, { "epoch": 0.36, "learning_rate": 4.9217783624752266e-05, "logits/chosen": -2.4257960319519043, "logits/rejected": -2.481285333633423, "logps/chosen": -129.22506713867188, "logps/rejected": -133.507080078125, "loss": 0.6863, "rewards/accuracies": 0.5, "rewards/chosen": -0.7729750275611877, "rewards/margins": 0.07025709748268127, "rewards/rejected": -0.8432320356369019, "step": 275 }, { "epoch": 0.36, "learning_rate": 4.920886605182823e-05, "logits/chosen": -2.8374218940734863, "logits/rejected": -2.840282440185547, "logps/chosen": -183.80465698242188, "logps/rejected": -185.8097381591797, "loss": 0.6878, "rewards/accuracies": 0.5, "rewards/chosen": -0.8535721898078918, "rewards/margins": 0.04765475541353226, "rewards/rejected": -0.9012269377708435, "step": 276 }, { "epoch": 0.36, "learning_rate": 4.919989875181722e-05, "logits/chosen": -2.7011358737945557, "logits/rejected": -2.758044719696045, "logps/chosen": -176.79345703125, "logps/rejected": -174.94239807128906, "loss": 0.6994, "rewards/accuracies": 0.625, "rewards/chosen": -1.159913182258606, "rewards/margins": 0.09292294830083847, "rewards/rejected": -1.2528361082077026, "step": 277 }, { "epoch": 0.36, "learning_rate": 4.919088174313884e-05, "logits/chosen": -2.614689826965332, "logits/rejected": -2.645404577255249, "logps/chosen": -134.0303955078125, "logps/rejected": -162.16595458984375, "loss": 0.6113, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6662919521331787, "rewards/margins": 0.26484376192092896, "rewards/rejected": -0.9311355948448181, "step": 278 }, { "epoch": 0.37, "learning_rate": 4.91818150443148e-05, "logits/chosen": -2.6832685470581055, "logits/rejected": -2.670450448989868, "logps/chosen": -196.30052185058594, "logps/rejected": -179.43482971191406, "loss": 0.6394, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9087074995040894, "rewards/margins": 0.1679982841014862, "rewards/rejected": -1.076705813407898, "step": 279 }, { "epoch": 0.37, "learning_rate": 4.917269867396886e-05, "logits/chosen": -2.8207449913024902, "logits/rejected": -2.773184061050415, "logps/chosen": -198.20509338378906, "logps/rejected": -184.33151245117188, "loss": 0.7475, "rewards/accuracies": 0.375, "rewards/chosen": -0.9387542009353638, "rewards/margins": 0.003928817808628082, "rewards/rejected": -0.9426830410957336, "step": 280 }, { "epoch": 0.37, "learning_rate": 4.916353265082686e-05, "logits/chosen": -2.75034236907959, "logits/rejected": -2.7130117416381836, "logps/chosen": -187.1002197265625, "logps/rejected": -193.2103271484375, "loss": 0.8817, "rewards/accuracies": 0.25, "rewards/chosen": -1.0758837461471558, "rewards/margins": -0.2836476266384125, "rewards/rejected": -0.7922362089157104, "step": 281 }, { "epoch": 0.37, "learning_rate": 4.9154316993716565e-05, "logits/chosen": -2.7884602546691895, "logits/rejected": -2.889566421508789, "logps/chosen": -173.12225341796875, "logps/rejected": -181.60842895507812, "loss": 0.6636, "rewards/accuracies": 0.4375, "rewards/chosen": -1.0141631364822388, "rewards/margins": 0.16198423504829407, "rewards/rejected": -1.1761474609375, "step": 282 }, { "epoch": 0.37, "learning_rate": 4.9145051721567734e-05, "logits/chosen": -2.711033344268799, "logits/rejected": -2.731414318084717, "logps/chosen": -190.81759643554688, "logps/rejected": -214.6211700439453, "loss": 0.5886, "rewards/accuracies": 0.75, "rewards/chosen": -0.9142743945121765, "rewards/margins": 0.34260398149490356, "rewards/rejected": -1.25687837600708, "step": 283 }, { "epoch": 0.37, "learning_rate": 4.913573685341205e-05, "logits/chosen": -2.534449338912964, "logits/rejected": -2.6240017414093018, "logps/chosen": -158.4725341796875, "logps/rejected": -144.93075561523438, "loss": 0.6633, "rewards/accuracies": 0.5, "rewards/chosen": -0.8281882405281067, "rewards/margins": 0.08525969088077545, "rewards/rejected": -0.9134478569030762, "step": 284 }, { "epoch": 0.37, "learning_rate": 4.9126372408383025e-05, "logits/chosen": -2.8126165866851807, "logits/rejected": -2.9268059730529785, "logps/chosen": -165.80160522460938, "logps/rejected": -195.13014221191406, "loss": 0.6719, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0856614112854004, "rewards/margins": 0.1156027615070343, "rewards/rejected": -1.2012642621994019, "step": 285 }, { "epoch": 0.37, "learning_rate": 4.911695840571605e-05, "logits/chosen": -2.8474719524383545, "logits/rejected": -2.870950698852539, "logps/chosen": -185.34619140625, "logps/rejected": -204.24951171875, "loss": 0.6981, "rewards/accuracies": 0.5, "rewards/chosen": -0.9124755859375, "rewards/margins": 0.032730571925640106, "rewards/rejected": -0.9452061057090759, "step": 286 }, { "epoch": 0.38, "learning_rate": 4.910749486474828e-05, "logits/chosen": -2.7202816009521484, "logits/rejected": -2.7738454341888428, "logps/chosen": -183.0021514892578, "logps/rejected": -176.14559936523438, "loss": 0.7423, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8500471711158752, "rewards/margins": -0.009690776467323303, "rewards/rejected": -0.8403564691543579, "step": 287 }, { "epoch": 0.38, "learning_rate": 4.909798180491865e-05, "logits/chosen": -2.8064842224121094, "logits/rejected": -2.8379313945770264, "logps/chosen": -181.36424255371094, "logps/rejected": -186.806884765625, "loss": 0.7156, "rewards/accuracies": 0.4375, "rewards/chosen": -0.9925443530082703, "rewards/margins": 0.02362808585166931, "rewards/rejected": -1.0161724090576172, "step": 288 }, { "epoch": 0.38, "learning_rate": 4.9088419245767803e-05, "logits/chosen": -2.591299057006836, "logits/rejected": -2.6505491733551025, "logps/chosen": -165.84970092773438, "logps/rejected": -189.2133331298828, "loss": 0.6588, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6631417870521545, "rewards/margins": 0.1258070468902588, "rewards/rejected": -0.7889488339424133, "step": 289 }, { "epoch": 0.38, "learning_rate": 4.907880720693804e-05, "logits/chosen": -2.9960107803344727, "logits/rejected": -2.9374802112579346, "logps/chosen": -213.4750518798828, "logps/rejected": -237.82672119140625, "loss": 0.656, "rewards/accuracies": 0.5, "rewards/chosen": -0.8233738541603088, "rewards/margins": 0.1019565686583519, "rewards/rejected": -0.9253304600715637, "step": 290 }, { "epoch": 0.38, "learning_rate": 4.9069145708173324e-05, "logits/chosen": -2.6092634201049805, "logits/rejected": -2.6358261108398438, "logps/chosen": -197.22938537597656, "logps/rejected": -190.7332000732422, "loss": 0.613, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7702199220657349, "rewards/margins": 0.23700743913650513, "rewards/rejected": -1.0072274208068848, "step": 291 }, { "epoch": 0.38, "learning_rate": 4.9059434769319205e-05, "logits/chosen": -2.731299638748169, "logits/rejected": -2.8226795196533203, "logps/chosen": -202.76451110839844, "logps/rejected": -244.38844299316406, "loss": 0.589, "rewards/accuracies": 0.75, "rewards/chosen": -0.6375839114189148, "rewards/margins": 0.2866172790527344, "rewards/rejected": -0.9242011904716492, "step": 292 }, { "epoch": 0.38, "learning_rate": 4.904967441032278e-05, "logits/chosen": -2.478205680847168, "logits/rejected": -2.541795253753662, "logps/chosen": -195.00408935546875, "logps/rejected": -225.96731567382812, "loss": 0.6568, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6428064107894897, "rewards/margins": 0.17480693757534027, "rewards/rejected": -0.8176133632659912, "step": 293 }, { "epoch": 0.38, "learning_rate": 4.903986465123266e-05, "logits/chosen": -2.692394733428955, "logits/rejected": -2.7363274097442627, "logps/chosen": -167.2650146484375, "logps/rejected": -220.62228393554688, "loss": 0.7124, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8301478624343872, "rewards/margins": 0.07167816907167435, "rewards/rejected": -0.901826024055481, "step": 294 }, { "epoch": 0.39, "learning_rate": 4.903000551219894e-05, "logits/chosen": -2.782505989074707, "logits/rejected": -2.81825590133667, "logps/chosen": -153.23898315429688, "logps/rejected": -157.7719268798828, "loss": 0.7488, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8630144596099854, "rewards/margins": -0.03182988613843918, "rewards/rejected": -0.8311845660209656, "step": 295 }, { "epoch": 0.39, "learning_rate": 4.902009701347313e-05, "logits/chosen": -2.7017691135406494, "logits/rejected": -2.751739978790283, "logps/chosen": -199.3386688232422, "logps/rejected": -189.38465881347656, "loss": 0.7063, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7444071769714355, "rewards/margins": 0.05856693163514137, "rewards/rejected": -0.8029740452766418, "step": 296 }, { "epoch": 0.39, "learning_rate": 4.901013917540814e-05, "logits/chosen": -2.62956166267395, "logits/rejected": -2.65138578414917, "logps/chosen": -209.6748046875, "logps/rejected": -200.0463409423828, "loss": 0.6986, "rewards/accuracies": 0.5, "rewards/chosen": -0.778890073299408, "rewards/margins": 0.03998234495520592, "rewards/rejected": -0.8188724517822266, "step": 297 }, { "epoch": 0.39, "learning_rate": 4.900013201845821e-05, "logits/chosen": -2.604013204574585, "logits/rejected": -2.6575911045074463, "logps/chosen": -192.33749389648438, "logps/rejected": -196.74574279785156, "loss": 0.7202, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7086251378059387, "rewards/margins": 0.026372164487838745, "rewards/rejected": -0.7349973917007446, "step": 298 }, { "epoch": 0.39, "learning_rate": 4.899007556317893e-05, "logits/chosen": -2.672982931137085, "logits/rejected": -2.7704596519470215, "logps/chosen": -232.0260009765625, "logps/rejected": -230.7442169189453, "loss": 0.666, "rewards/accuracies": 0.625, "rewards/chosen": -0.703597366809845, "rewards/margins": 0.1020139679312706, "rewards/rejected": -0.8056113123893738, "step": 299 }, { "epoch": 0.39, "learning_rate": 4.8979969830227086e-05, "logits/chosen": -2.7685508728027344, "logits/rejected": -2.798677921295166, "logps/chosen": -170.82254028320312, "logps/rejected": -211.26219177246094, "loss": 0.6242, "rewards/accuracies": 0.75, "rewards/chosen": -0.6501718759536743, "rewards/margins": 0.23585857450962067, "rewards/rejected": -0.8860303163528442, "step": 300 }, { "epoch": 0.39, "learning_rate": 4.896981484036074e-05, "logits/chosen": -2.742246150970459, "logits/rejected": -2.7133634090423584, "logps/chosen": -188.86024475097656, "logps/rejected": -199.79791259765625, "loss": 0.5836, "rewards/accuracies": 0.625, "rewards/chosen": -0.5299299955368042, "rewards/margins": 0.2819333076477051, "rewards/rejected": -0.8118634223937988, "step": 301 }, { "epoch": 0.4, "learning_rate": 4.895961061443911e-05, "logits/chosen": -2.726637840270996, "logits/rejected": -2.728475332260132, "logps/chosen": -204.32278442382812, "logps/rejected": -233.36709594726562, "loss": 0.8344, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8258933424949646, "rewards/margins": -0.08616884052753448, "rewards/rejected": -0.7397244572639465, "step": 302 }, { "epoch": 0.4, "learning_rate": 4.894935717342255e-05, "logits/chosen": -2.757063865661621, "logits/rejected": -2.7701942920684814, "logps/chosen": -192.92251586914062, "logps/rejected": -192.76185607910156, "loss": 0.7007, "rewards/accuracies": 0.5, "rewards/chosen": -0.71357262134552, "rewards/margins": 0.04112683981657028, "rewards/rejected": -0.7546994686126709, "step": 303 }, { "epoch": 0.4, "learning_rate": 4.8939054538372496e-05, "logits/chosen": -2.6160852909088135, "logits/rejected": -2.6638543605804443, "logps/chosen": -153.6888427734375, "logps/rejected": -204.19139099121094, "loss": 0.6179, "rewards/accuracies": 0.625, "rewards/chosen": -0.5954157710075378, "rewards/margins": 0.2527124583721161, "rewards/rejected": -0.8481282591819763, "step": 304 }, { "epoch": 0.4, "learning_rate": 4.8928702730451456e-05, "logits/chosen": -2.635458469390869, "logits/rejected": -2.740834951400757, "logps/chosen": -229.58181762695312, "logps/rejected": -209.87832641601562, "loss": 0.7217, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7451784610748291, "rewards/margins": 0.019871072843670845, "rewards/rejected": -0.7650495171546936, "step": 305 }, { "epoch": 0.4, "learning_rate": 4.891830177092294e-05, "logits/chosen": -2.515532970428467, "logits/rejected": -2.5850772857666016, "logps/chosen": -173.58749389648438, "logps/rejected": -197.440185546875, "loss": 0.6521, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6022768616676331, "rewards/margins": 0.11426748335361481, "rewards/rejected": -0.7165443301200867, "step": 306 }, { "epoch": 0.4, "learning_rate": 4.8907851681151396e-05, "logits/chosen": -2.708148956298828, "logits/rejected": -2.735069751739502, "logps/chosen": -146.8754425048828, "logps/rejected": -182.95477294921875, "loss": 0.6006, "rewards/accuracies": 0.75, "rewards/chosen": -0.35234156250953674, "rewards/margins": 0.24402308464050293, "rewards/rejected": -0.5963646173477173, "step": 307 }, { "epoch": 0.4, "learning_rate": 4.889735248260221e-05, "logits/chosen": -2.657132625579834, "logits/rejected": -2.7396936416625977, "logps/chosen": -172.76268005371094, "logps/rejected": -185.73153686523438, "loss": 0.6844, "rewards/accuracies": 0.625, "rewards/chosen": -0.7841547727584839, "rewards/margins": 0.08935101330280304, "rewards/rejected": -0.8735058307647705, "step": 308 }, { "epoch": 0.4, "learning_rate": 4.8886804196841626e-05, "logits/chosen": -2.6601943969726562, "logits/rejected": -2.6764605045318604, "logps/chosen": -182.19937133789062, "logps/rejected": -196.57550048828125, "loss": 0.6327, "rewards/accuracies": 0.625, "rewards/chosen": -0.7912774085998535, "rewards/margins": 0.19412587583065033, "rewards/rejected": -0.9854032397270203, "step": 309 }, { "epoch": 0.41, "learning_rate": 4.887620684553674e-05, "logits/chosen": -2.5685248374938965, "logits/rejected": -2.5472164154052734, "logps/chosen": -161.2342529296875, "logps/rejected": -201.6751708984375, "loss": 0.683, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6504772901535034, "rewards/margins": 0.089483842253685, "rewards/rejected": -0.7399611473083496, "step": 310 }, { "epoch": 0.41, "learning_rate": 4.886556045045542e-05, "logits/chosen": -2.7824556827545166, "logits/rejected": -2.772510528564453, "logps/chosen": -183.0452880859375, "logps/rejected": -197.40408325195312, "loss": 0.8643, "rewards/accuracies": 0.375, "rewards/chosen": -1.0302786827087402, "rewards/margins": -0.2259717881679535, "rewards/rejected": -0.8043068647384644, "step": 311 }, { "epoch": 0.41, "learning_rate": 4.8854865033466275e-05, "logits/chosen": -2.457235097885132, "logits/rejected": -2.5225629806518555, "logps/chosen": -137.17259216308594, "logps/rejected": -151.13467407226562, "loss": 0.5958, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6421889662742615, "rewards/margins": 0.3034355044364929, "rewards/rejected": -0.9456245303153992, "step": 312 }, { "epoch": 0.41, "learning_rate": 4.88441206165386e-05, "logits/chosen": -2.7572522163391113, "logits/rejected": -2.7597944736480713, "logps/chosen": -194.38626098632812, "logps/rejected": -208.2905731201172, "loss": 0.848, "rewards/accuracies": 0.3125, "rewards/chosen": -0.8563791513442993, "rewards/margins": -0.21535280346870422, "rewards/rejected": -0.6410263180732727, "step": 313 }, { "epoch": 0.41, "learning_rate": 4.8833327221742356e-05, "logits/chosen": -2.589763641357422, "logits/rejected": -2.5469565391540527, "logps/chosen": -159.60533142089844, "logps/rejected": -153.5530548095703, "loss": 0.6174, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6452800035476685, "rewards/margins": 0.2377607673406601, "rewards/rejected": -0.8830407857894897, "step": 314 }, { "epoch": 0.41, "learning_rate": 4.88224848712481e-05, "logits/chosen": -2.6565144062042236, "logits/rejected": -2.694272994995117, "logps/chosen": -175.2017059326172, "logps/rejected": -171.8043975830078, "loss": 0.6785, "rewards/accuracies": 0.5625, "rewards/chosen": -0.514467179775238, "rewards/margins": 0.11190642416477203, "rewards/rejected": -0.6263736486434937, "step": 315 }, { "epoch": 0.41, "learning_rate": 4.881159358732694e-05, "logits/chosen": -2.5067477226257324, "logits/rejected": -2.551682710647583, "logps/chosen": -187.95533752441406, "logps/rejected": -237.65907287597656, "loss": 0.6805, "rewards/accuracies": 0.625, "rewards/chosen": -0.6460933685302734, "rewards/margins": 0.15910674631595612, "rewards/rejected": -0.8052000403404236, "step": 316 }, { "epoch": 0.41, "learning_rate": 4.8800653392350526e-05, "logits/chosen": -2.4550201892852783, "logits/rejected": -2.519341468811035, "logps/chosen": -156.3563690185547, "logps/rejected": -171.7753143310547, "loss": 0.7012, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7904012799263, "rewards/margins": 0.05794687569141388, "rewards/rejected": -0.8483481407165527, "step": 317 }, { "epoch": 0.42, "learning_rate": 4.8789664308790936e-05, "logits/chosen": -2.7174458503723145, "logits/rejected": -2.6885735988616943, "logps/chosen": -172.3370361328125, "logps/rejected": -164.46519470214844, "loss": 0.6861, "rewards/accuracies": 0.375, "rewards/chosen": -0.6009785532951355, "rewards/margins": 0.08145736157894135, "rewards/rejected": -0.682435929775238, "step": 318 }, { "epoch": 0.42, "learning_rate": 4.8778626359220715e-05, "logits/chosen": -2.731224536895752, "logits/rejected": -2.6857399940490723, "logps/chosen": -185.449951171875, "logps/rejected": -206.4256591796875, "loss": 0.7672, "rewards/accuracies": 0.5, "rewards/chosen": -0.7531261444091797, "rewards/margins": -0.07965384423732758, "rewards/rejected": -0.6734722852706909, "step": 319 }, { "epoch": 0.42, "learning_rate": 4.8767539566312734e-05, "logits/chosen": -2.5604355335235596, "logits/rejected": -2.704071044921875, "logps/chosen": -174.9366912841797, "logps/rejected": -202.98768615722656, "loss": 0.6137, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6721182465553284, "rewards/margins": 0.21909648180007935, "rewards/rejected": -0.8912147283554077, "step": 320 }, { "epoch": 0.42, "learning_rate": 4.875640395284023e-05, "logits/chosen": -2.758918046951294, "logits/rejected": -2.7820916175842285, "logps/chosen": -195.2515869140625, "logps/rejected": -231.6067352294922, "loss": 0.5285, "rewards/accuracies": 0.8125, "rewards/chosen": -0.38608187437057495, "rewards/margins": 0.4270917773246765, "rewards/rejected": -0.8131736516952515, "step": 321 }, { "epoch": 0.42, "learning_rate": 4.874521954167671e-05, "logits/chosen": -2.7890138626098633, "logits/rejected": -2.8068411350250244, "logps/chosen": -209.79307556152344, "logps/rejected": -207.8020477294922, "loss": 0.6367, "rewards/accuracies": 0.625, "rewards/chosen": -0.6039459109306335, "rewards/margins": 0.1874314397573471, "rewards/rejected": -0.7913773059844971, "step": 322 }, { "epoch": 0.42, "learning_rate": 4.8733986355795905e-05, "logits/chosen": -2.675286054611206, "logits/rejected": -2.699337959289551, "logps/chosen": -233.40780639648438, "logps/rejected": -208.18724060058594, "loss": 0.6598, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6822580695152283, "rewards/margins": 0.1497945785522461, "rewards/rejected": -0.8320526480674744, "step": 323 }, { "epoch": 0.42, "learning_rate": 4.8722704418271745e-05, "logits/chosen": -2.5140862464904785, "logits/rejected": -2.632192850112915, "logps/chosen": -176.1000213623047, "logps/rejected": -196.43846130371094, "loss": 0.7119, "rewards/accuracies": 0.5, "rewards/chosen": -0.7464176416397095, "rewards/margins": 0.01839565485715866, "rewards/rejected": -0.7648133039474487, "step": 324 }, { "epoch": 0.43, "learning_rate": 4.871137375227829e-05, "logits/chosen": -2.6084189414978027, "logits/rejected": -2.6140480041503906, "logps/chosen": -188.7124481201172, "logps/rejected": -177.5293426513672, "loss": 0.748, "rewards/accuracies": 0.4375, "rewards/chosen": -0.618392288684845, "rewards/margins": 0.016466360539197922, "rewards/rejected": -0.6348586678504944, "step": 325 }, { "epoch": 0.43, "learning_rate": 4.869999438108971e-05, "logits/chosen": -2.6994099617004395, "logits/rejected": -2.743457555770874, "logps/chosen": -182.1778564453125, "logps/rejected": -194.54884338378906, "loss": 0.6603, "rewards/accuracies": 0.75, "rewards/chosen": -0.5676161050796509, "rewards/margins": 0.1599595993757248, "rewards/rejected": -0.7275756597518921, "step": 326 }, { "epoch": 0.43, "learning_rate": 4.8688566328080215e-05, "logits/chosen": -2.5730295181274414, "logits/rejected": -2.573648452758789, "logps/chosen": -199.49908447265625, "logps/rejected": -235.17568969726562, "loss": 0.5873, "rewards/accuracies": 0.6875, "rewards/chosen": -0.379108190536499, "rewards/margins": 0.30178508162498474, "rewards/rejected": -0.6808933019638062, "step": 327 }, { "epoch": 0.43, "learning_rate": 4.867708961672399e-05, "logits/chosen": -2.5466060638427734, "logits/rejected": -2.661322593688965, "logps/chosen": -183.09585571289062, "logps/rejected": -185.6378936767578, "loss": 0.666, "rewards/accuracies": 0.625, "rewards/chosen": -0.49351951479911804, "rewards/margins": 0.11638712882995605, "rewards/rejected": -0.6099066734313965, "step": 328 }, { "epoch": 0.43, "learning_rate": 4.866556427059519e-05, "logits/chosen": -2.7101516723632812, "logits/rejected": -2.644563913345337, "logps/chosen": -187.73348999023438, "logps/rejected": -169.40293884277344, "loss": 0.7781, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8404239416122437, "rewards/margins": -0.026825089007616043, "rewards/rejected": -0.8135988116264343, "step": 329 }, { "epoch": 0.43, "learning_rate": 4.865399031336787e-05, "logits/chosen": -2.4861361980438232, "logits/rejected": -2.565809726715088, "logps/chosen": -146.98423767089844, "logps/rejected": -163.1490478515625, "loss": 0.7333, "rewards/accuracies": 0.5, "rewards/chosen": -0.5940142869949341, "rewards/margins": 0.03410058468580246, "rewards/rejected": -0.6281149387359619, "step": 330 }, { "epoch": 0.43, "learning_rate": 4.8642367768815936e-05, "logits/chosen": -2.6412410736083984, "logits/rejected": -2.747821807861328, "logps/chosen": -174.8326873779297, "logps/rejected": -219.83612060546875, "loss": 0.5947, "rewards/accuracies": 0.75, "rewards/chosen": -0.4866630434989929, "rewards/margins": 0.2575409412384033, "rewards/rejected": -0.744204044342041, "step": 331 }, { "epoch": 0.43, "learning_rate": 4.863069666081307e-05, "logits/chosen": -2.6463019847869873, "logits/rejected": -2.796405553817749, "logps/chosen": -164.44869995117188, "logps/rejected": -217.47230529785156, "loss": 0.6092, "rewards/accuracies": 0.625, "rewards/chosen": -0.5829634070396423, "rewards/margins": 0.26363080739974976, "rewards/rejected": -0.8465942144393921, "step": 332 }, { "epoch": 0.44, "learning_rate": 4.861897701333274e-05, "logits/chosen": -2.6675515174865723, "logits/rejected": -2.6761202812194824, "logps/chosen": -187.8043212890625, "logps/rejected": -175.5827178955078, "loss": 0.8043, "rewards/accuracies": 0.3125, "rewards/chosen": -0.8770172595977783, "rewards/margins": -0.06917458772659302, "rewards/rejected": -0.8078427314758301, "step": 333 }, { "epoch": 0.44, "learning_rate": 4.86072088504481e-05, "logits/chosen": -2.6193923950195312, "logits/rejected": -2.652172088623047, "logps/chosen": -180.9799346923828, "logps/rejected": -197.08245849609375, "loss": 0.756, "rewards/accuracies": 0.3125, "rewards/chosen": -0.8450060486793518, "rewards/margins": -0.008023982867598534, "rewards/rejected": -0.8369821310043335, "step": 334 }, { "epoch": 0.44, "learning_rate": 4.859539219633199e-05, "logits/chosen": -2.4201056957244873, "logits/rejected": -2.547577381134033, "logps/chosen": -142.2472381591797, "logps/rejected": -171.77197265625, "loss": 0.5808, "rewards/accuracies": 0.75, "rewards/chosen": -0.25414225459098816, "rewards/margins": 0.2882387042045593, "rewards/rejected": -0.5423809289932251, "step": 335 }, { "epoch": 0.44, "learning_rate": 4.8583527075256804e-05, "logits/chosen": -2.6075916290283203, "logits/rejected": -2.6171302795410156, "logps/chosen": -185.60699462890625, "logps/rejected": -199.539306640625, "loss": 0.6039, "rewards/accuracies": 0.625, "rewards/chosen": -0.6799944043159485, "rewards/margins": 0.21594738960266113, "rewards/rejected": -0.8959417939186096, "step": 336 }, { "epoch": 0.44, "learning_rate": 4.857161351159454e-05, "logits/chosen": -2.7377195358276367, "logits/rejected": -2.722515821456909, "logps/chosen": -225.74755859375, "logps/rejected": -224.8936004638672, "loss": 0.6793, "rewards/accuracies": 0.75, "rewards/chosen": -0.8174278140068054, "rewards/margins": 0.10962995886802673, "rewards/rejected": -0.9270578026771545, "step": 337 }, { "epoch": 0.44, "learning_rate": 4.8559651529816664e-05, "logits/chosen": -2.597797393798828, "logits/rejected": -2.6779723167419434, "logps/chosen": -159.58380126953125, "logps/rejected": -185.880615234375, "loss": 0.679, "rewards/accuracies": 0.5, "rewards/chosen": -0.7312408089637756, "rewards/margins": 0.1112150177359581, "rewards/rejected": -0.8424558639526367, "step": 338 }, { "epoch": 0.44, "learning_rate": 4.854764115449411e-05, "logits/chosen": -2.7188682556152344, "logits/rejected": -2.691462993621826, "logps/chosen": -143.06845092773438, "logps/rejected": -142.25157165527344, "loss": 0.6675, "rewards/accuracies": 0.5625, "rewards/chosen": -0.675825297832489, "rewards/margins": 0.130048006772995, "rewards/rejected": -0.8058732748031616, "step": 339 }, { "epoch": 0.45, "learning_rate": 4.853558241029723e-05, "logits/chosen": -2.629163980484009, "logits/rejected": -2.5702826976776123, "logps/chosen": -224.11196899414062, "logps/rejected": -171.19097900390625, "loss": 0.718, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6522189378738403, "rewards/margins": 0.046554647386074066, "rewards/rejected": -0.6987735629081726, "step": 340 }, { "epoch": 0.45, "learning_rate": 4.8523475321995715e-05, "logits/chosen": -2.7033562660217285, "logits/rejected": -2.5383901596069336, "logps/chosen": -181.98219299316406, "logps/rejected": -172.13865661621094, "loss": 0.719, "rewards/accuracies": 0.625, "rewards/chosen": -0.5484719276428223, "rewards/margins": 0.0074146464467048645, "rewards/rejected": -0.5558865666389465, "step": 341 }, { "epoch": 0.45, "learning_rate": 4.8511319914458555e-05, "logits/chosen": -2.5836706161499023, "logits/rejected": -2.5528974533081055, "logps/chosen": -223.9678497314453, "logps/rejected": -211.67184448242188, "loss": 0.7904, "rewards/accuracies": 0.5, "rewards/chosen": -0.7669371366500854, "rewards/margins": -0.11547183990478516, "rewards/rejected": -0.6514652967453003, "step": 342 }, { "epoch": 0.45, "learning_rate": 4.849911621265401e-05, "logits/chosen": -2.652245044708252, "logits/rejected": -2.6907477378845215, "logps/chosen": -172.45669555664062, "logps/rejected": -173.8350067138672, "loss": 0.7916, "rewards/accuracies": 0.3125, "rewards/chosen": -0.713798999786377, "rewards/margins": -0.09893038123846054, "rewards/rejected": -0.6148686408996582, "step": 343 }, { "epoch": 0.45, "learning_rate": 4.848686424164953e-05, "logits/chosen": -2.703127384185791, "logits/rejected": -2.704859733581543, "logps/chosen": -214.36392211914062, "logps/rejected": -185.55931091308594, "loss": 0.7281, "rewards/accuracies": 0.5, "rewards/chosen": -0.7356032133102417, "rewards/margins": -0.018105890601873398, "rewards/rejected": -0.7174972891807556, "step": 344 }, { "epoch": 0.45, "learning_rate": 4.84745640266117e-05, "logits/chosen": -2.690885066986084, "logits/rejected": -2.740234851837158, "logps/chosen": -176.29266357421875, "logps/rejected": -206.47596740722656, "loss": 0.5979, "rewards/accuracies": 0.625, "rewards/chosen": -0.6022865176200867, "rewards/margins": 0.31845831871032715, "rewards/rejected": -0.9207448363304138, "step": 345 }, { "epoch": 0.45, "learning_rate": 4.846221559280624e-05, "logits/chosen": -2.6713180541992188, "logits/rejected": -2.7118892669677734, "logps/chosen": -144.1902313232422, "logps/rejected": -170.08709716796875, "loss": 0.7913, "rewards/accuracies": 0.4375, "rewards/chosen": -0.737549364566803, "rewards/margins": -0.0928279459476471, "rewards/rejected": -0.6447213888168335, "step": 346 }, { "epoch": 0.45, "learning_rate": 4.844981896559787e-05, "logits/chosen": -2.7589731216430664, "logits/rejected": -2.6973843574523926, "logps/chosen": -185.05255126953125, "logps/rejected": -177.13946533203125, "loss": 0.7678, "rewards/accuracies": 0.5, "rewards/chosen": -0.8261479735374451, "rewards/margins": -0.09880837798118591, "rewards/rejected": -0.7273396253585815, "step": 347 }, { "epoch": 0.46, "learning_rate": 4.8437374170450344e-05, "logits/chosen": -2.659578323364258, "logits/rejected": -2.617631196975708, "logps/chosen": -175.1558074951172, "logps/rejected": -198.41343688964844, "loss": 0.6794, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8928545117378235, "rewards/margins": 0.10537480562925339, "rewards/rejected": -0.9982293844223022, "step": 348 }, { "epoch": 0.46, "learning_rate": 4.842488123292632e-05, "logits/chosen": -2.6015379428863525, "logits/rejected": -2.5835628509521484, "logps/chosen": -188.88092041015625, "logps/rejected": -183.85733032226562, "loss": 0.6908, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7475460171699524, "rewards/margins": 0.20796708762645721, "rewards/rejected": -0.9555131196975708, "step": 349 }, { "epoch": 0.46, "learning_rate": 4.8412340178687374e-05, "logits/chosen": -2.652589797973633, "logits/rejected": -2.7427022457122803, "logps/chosen": -198.62112426757812, "logps/rejected": -206.33192443847656, "loss": 0.5545, "rewards/accuracies": 0.75, "rewards/chosen": -0.6447871923446655, "rewards/margins": 0.38781607151031494, "rewards/rejected": -1.0326032638549805, "step": 350 }, { "epoch": 0.46, "learning_rate": 4.839975103349391e-05, "logits/chosen": -2.79940128326416, "logits/rejected": -2.8857078552246094, "logps/chosen": -206.7621307373047, "logps/rejected": -231.99472045898438, "loss": 0.5811, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5604078769683838, "rewards/margins": 0.5052842497825623, "rewards/rejected": -1.0656920671463013, "step": 351 }, { "epoch": 0.46, "learning_rate": 4.8387113823205096e-05, "logits/chosen": -2.562626361846924, "logits/rejected": -2.5439820289611816, "logps/chosen": -198.9171142578125, "logps/rejected": -184.92816162109375, "loss": 0.7251, "rewards/accuracies": 0.5, "rewards/chosen": -0.6547099351882935, "rewards/margins": 0.07420587539672852, "rewards/rejected": -0.7289157509803772, "step": 352 }, { "epoch": 0.46, "learning_rate": 4.8374428573778864e-05, "logits/chosen": -2.6110103130340576, "logits/rejected": -2.612607479095459, "logps/chosen": -176.07400512695312, "logps/rejected": -181.38418579101562, "loss": 0.8065, "rewards/accuracies": 0.5, "rewards/chosen": -0.758232593536377, "rewards/margins": -0.0620691180229187, "rewards/rejected": -0.6961634755134583, "step": 353 }, { "epoch": 0.46, "learning_rate": 4.8361695311271795e-05, "logits/chosen": -2.8273181915283203, "logits/rejected": -2.830404758453369, "logps/chosen": -205.66763305664062, "logps/rejected": -222.3781280517578, "loss": 0.671, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8250411152839661, "rewards/margins": 0.13250316679477692, "rewards/rejected": -0.9575443267822266, "step": 354 }, { "epoch": 0.46, "learning_rate": 4.83489140618391e-05, "logits/chosen": -2.666761875152588, "logits/rejected": -2.757248878479004, "logps/chosen": -215.3235626220703, "logps/rejected": -216.06568908691406, "loss": 0.6942, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7060136795043945, "rewards/margins": 0.16570770740509033, "rewards/rejected": -0.8717214465141296, "step": 355 }, { "epoch": 0.47, "learning_rate": 4.833608485173457e-05, "logits/chosen": -2.3725719451904297, "logits/rejected": -2.4071693420410156, "logps/chosen": -146.78704833984375, "logps/rejected": -186.87950134277344, "loss": 0.6066, "rewards/accuracies": 0.75, "rewards/chosen": -0.4698937237262726, "rewards/margins": 0.2167559713125229, "rewards/rejected": -0.6866496801376343, "step": 356 }, { "epoch": 0.47, "learning_rate": 4.8323207707310496e-05, "logits/chosen": -2.6677677631378174, "logits/rejected": -2.5784990787506104, "logps/chosen": -204.05075073242188, "logps/rejected": -224.5699920654297, "loss": 0.5351, "rewards/accuracies": 0.75, "rewards/chosen": -0.4006405174732208, "rewards/margins": 0.5226905941963196, "rewards/rejected": -0.923331081867218, "step": 357 }, { "epoch": 0.47, "learning_rate": 4.831028265501764e-05, "logits/chosen": -2.6300296783447266, "logits/rejected": -2.810920238494873, "logps/chosen": -177.45635986328125, "logps/rejected": -234.4469451904297, "loss": 0.5034, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4587719440460205, "rewards/margins": 0.5265656113624573, "rewards/rejected": -0.9853376150131226, "step": 358 }, { "epoch": 0.47, "learning_rate": 4.829730972140517e-05, "logits/chosen": -2.5978078842163086, "logits/rejected": -2.570225954055786, "logps/chosen": -133.3565673828125, "logps/rejected": -136.15902709960938, "loss": 0.632, "rewards/accuracies": 0.5, "rewards/chosen": -0.3776158094406128, "rewards/margins": 0.1921352744102478, "rewards/rejected": -0.5697510242462158, "step": 359 }, { "epoch": 0.47, "learning_rate": 4.8284288933120594e-05, "logits/chosen": -2.5982682704925537, "logits/rejected": -2.6651642322540283, "logps/chosen": -181.2775115966797, "logps/rejected": -203.33876037597656, "loss": 0.5832, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4960383474826813, "rewards/margins": 0.31738704442977905, "rewards/rejected": -0.8134254217147827, "step": 360 }, { "epoch": 0.47, "learning_rate": 4.8271220316909735e-05, "logits/chosen": -2.761739730834961, "logits/rejected": -2.715710401535034, "logps/chosen": -191.56158447265625, "logps/rejected": -175.9281463623047, "loss": 0.7011, "rewards/accuracies": 0.5625, "rewards/chosen": -0.62202388048172, "rewards/margins": 0.03054755926132202, "rewards/rejected": -0.652571439743042, "step": 361 }, { "epoch": 0.47, "learning_rate": 4.825810389961666e-05, "logits/chosen": -2.614866018295288, "logits/rejected": -2.6899471282958984, "logps/chosen": -135.42494201660156, "logps/rejected": -168.2887725830078, "loss": 0.6542, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5677900910377502, "rewards/margins": 0.13706976175308228, "rewards/rejected": -0.7048598527908325, "step": 362 }, { "epoch": 0.48, "learning_rate": 4.8244939708183596e-05, "logits/chosen": -2.643815040588379, "logits/rejected": -2.6820266246795654, "logps/chosen": -178.99908447265625, "logps/rejected": -212.68692016601562, "loss": 0.5719, "rewards/accuracies": 0.75, "rewards/chosen": -0.5018581748008728, "rewards/margins": 0.3394116163253784, "rewards/rejected": -0.841269850730896, "step": 363 }, { "epoch": 0.48, "learning_rate": 4.823172776965094e-05, "logits/chosen": -2.702993869781494, "logits/rejected": -2.7458367347717285, "logps/chosen": -239.1004638671875, "logps/rejected": -227.72450256347656, "loss": 0.6493, "rewards/accuracies": 0.625, "rewards/chosen": -0.4328669309616089, "rewards/margins": 0.1465311497449875, "rewards/rejected": -0.5793980360031128, "step": 364 }, { "epoch": 0.48, "learning_rate": 4.821846811115713e-05, "logits/chosen": -2.658914804458618, "logits/rejected": -2.728074073791504, "logps/chosen": -207.8568115234375, "logps/rejected": -194.11788940429688, "loss": 0.643, "rewards/accuracies": 0.625, "rewards/chosen": -0.5129541754722595, "rewards/margins": 0.19354072213172913, "rewards/rejected": -0.706494927406311, "step": 365 }, { "epoch": 0.48, "learning_rate": 4.820516075993865e-05, "logits/chosen": -2.741415023803711, "logits/rejected": -2.6675500869750977, "logps/chosen": -156.90858459472656, "logps/rejected": -166.85830688476562, "loss": 0.8793, "rewards/accuracies": 0.3125, "rewards/chosen": -0.9281965494155884, "rewards/margins": -0.2124374508857727, "rewards/rejected": -0.7157591581344604, "step": 366 }, { "epoch": 0.48, "learning_rate": 4.819180574332994e-05, "logits/chosen": -2.7196927070617676, "logits/rejected": -2.748945713043213, "logps/chosen": -202.15493774414062, "logps/rejected": -194.124755859375, "loss": 0.7879, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8033719062805176, "rewards/margins": -0.02236950770020485, "rewards/rejected": -0.7810022830963135, "step": 367 }, { "epoch": 0.48, "learning_rate": 4.8178403088763355e-05, "logits/chosen": -2.678246259689331, "logits/rejected": -2.729729652404785, "logps/chosen": -160.1791534423828, "logps/rejected": -185.8133544921875, "loss": 0.7682, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5224413871765137, "rewards/margins": -0.03584878519177437, "rewards/rejected": -0.4865925908088684, "step": 368 }, { "epoch": 0.48, "learning_rate": 4.8164952823769085e-05, "logits/chosen": -2.649317979812622, "logits/rejected": -2.655850410461426, "logps/chosen": -241.83953857421875, "logps/rejected": -226.36309814453125, "loss": 0.6643, "rewards/accuracies": 0.5, "rewards/chosen": -0.6058496236801147, "rewards/margins": 0.16187947988510132, "rewards/rejected": -0.7677291035652161, "step": 369 }, { "epoch": 0.48, "learning_rate": 4.815145497597514e-05, "logits/chosen": -2.711923837661743, "logits/rejected": -2.7140746116638184, "logps/chosen": -157.05551147460938, "logps/rejected": -153.10430908203125, "loss": 0.7461, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6182957291603088, "rewards/margins": -0.003988802433013916, "rewards/rejected": -0.6143069267272949, "step": 370 }, { "epoch": 0.49, "learning_rate": 4.8137909573107246e-05, "logits/chosen": -2.632993459701538, "logits/rejected": -2.7051479816436768, "logps/chosen": -193.94879150390625, "logps/rejected": -189.07920837402344, "loss": 0.6392, "rewards/accuracies": 0.5, "rewards/chosen": -0.6661685705184937, "rewards/margins": 0.24933494627475739, "rewards/rejected": -0.9155035614967346, "step": 371 }, { "epoch": 0.49, "learning_rate": 4.812431664298883e-05, "logits/chosen": -2.8046534061431885, "logits/rejected": -2.7321465015411377, "logps/chosen": -205.9781951904297, "logps/rejected": -212.83636474609375, "loss": 0.6998, "rewards/accuracies": 0.5, "rewards/chosen": -0.49066948890686035, "rewards/margins": 0.02486901544034481, "rewards/rejected": -0.5155385136604309, "step": 372 }, { "epoch": 0.49, "learning_rate": 4.811067621354094e-05, "logits/chosen": -2.471921920776367, "logits/rejected": -2.464104175567627, "logps/chosen": -146.0660400390625, "logps/rejected": -147.8424835205078, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": -0.5911726951599121, "rewards/margins": 0.050756677985191345, "rewards/rejected": -0.6419293284416199, "step": 373 }, { "epoch": 0.49, "learning_rate": 4.8096988312782174e-05, "logits/chosen": -2.733851671218872, "logits/rejected": -2.734400510787964, "logps/chosen": -201.3523712158203, "logps/rejected": -213.01361083984375, "loss": 0.6494, "rewards/accuracies": 0.625, "rewards/chosen": -0.4939979016780853, "rewards/margins": 0.25225183367729187, "rewards/rejected": -0.7462497353553772, "step": 374 }, { "epoch": 0.49, "learning_rate": 4.8083252968828665e-05, "logits/chosen": -2.5853540897369385, "logits/rejected": -2.5439412593841553, "logps/chosen": -240.09124755859375, "logps/rejected": -200.9342803955078, "loss": 0.7954, "rewards/accuracies": 0.5, "rewards/chosen": -0.7007974982261658, "rewards/margins": -0.10030128061771393, "rewards/rejected": -0.6004961133003235, "step": 375 }, { "epoch": 0.49, "learning_rate": 4.8069470209893974e-05, "logits/chosen": -2.4914681911468506, "logits/rejected": -2.5363824367523193, "logps/chosen": -176.0529327392578, "logps/rejected": -212.58383178710938, "loss": 0.59, "rewards/accuracies": 0.75, "rewards/chosen": -0.4798307418823242, "rewards/margins": 0.2897476851940155, "rewards/rejected": -0.7695784568786621, "step": 376 }, { "epoch": 0.49, "learning_rate": 4.8055640064289086e-05, "logits/chosen": -2.627121686935425, "logits/rejected": -2.6512184143066406, "logps/chosen": -174.460205078125, "logps/rejected": -205.20346069335938, "loss": 0.6553, "rewards/accuracies": 0.625, "rewards/chosen": -0.5525593757629395, "rewards/margins": 0.1847618669271469, "rewards/rejected": -0.7373212575912476, "step": 377 }, { "epoch": 0.49, "learning_rate": 4.80417625604223e-05, "logits/chosen": -2.405069589614868, "logits/rejected": -2.5229604244232178, "logps/chosen": -179.3883819580078, "logps/rejected": -195.5463409423828, "loss": 0.5946, "rewards/accuracies": 0.75, "rewards/chosen": -0.7376325726509094, "rewards/margins": 0.28595036268234253, "rewards/rejected": -1.023582935333252, "step": 378 }, { "epoch": 0.5, "learning_rate": 4.8027837726799205e-05, "logits/chosen": -2.6785571575164795, "logits/rejected": -2.725235939025879, "logps/chosen": -177.39048767089844, "logps/rejected": -180.63433837890625, "loss": 0.7008, "rewards/accuracies": 0.5, "rewards/chosen": -0.4321979880332947, "rewards/margins": 0.021436618641018867, "rewards/rejected": -0.4536346197128296, "step": 379 }, { "epoch": 0.5, "learning_rate": 4.801386559202259e-05, "logits/chosen": -2.338463544845581, "logits/rejected": -2.3258635997772217, "logps/chosen": -136.97865295410156, "logps/rejected": -175.3179168701172, "loss": 0.6352, "rewards/accuracies": 0.625, "rewards/chosen": -0.3434058427810669, "rewards/margins": 0.15200775861740112, "rewards/rejected": -0.495413601398468, "step": 380 }, { "epoch": 0.5, "learning_rate": 4.799984618479242e-05, "logits/chosen": -2.6278395652770996, "logits/rejected": -2.508350372314453, "logps/chosen": -154.35214233398438, "logps/rejected": -188.7537841796875, "loss": 0.6372, "rewards/accuracies": 0.5, "rewards/chosen": -0.4511483311653137, "rewards/margins": 0.18060770630836487, "rewards/rejected": -0.6317560076713562, "step": 381 }, { "epoch": 0.5, "learning_rate": 4.798577953390577e-05, "logits/chosen": -2.561044931411743, "logits/rejected": -2.6394059658050537, "logps/chosen": -197.1724395751953, "logps/rejected": -189.59738159179688, "loss": 0.6657, "rewards/accuracies": 0.5, "rewards/chosen": -0.5811476111412048, "rewards/margins": 0.09012848883867264, "rewards/rejected": -0.6712760329246521, "step": 382 }, { "epoch": 0.5, "learning_rate": 4.797166566825675e-05, "logits/chosen": -2.4525938034057617, "logits/rejected": -2.509936809539795, "logps/chosen": -134.09764099121094, "logps/rejected": -151.4327850341797, "loss": 0.6024, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24274428188800812, "rewards/margins": 0.2151957005262375, "rewards/rejected": -0.4579399824142456, "step": 383 }, { "epoch": 0.5, "learning_rate": 4.795750461683644e-05, "logits/chosen": -2.5733039379119873, "logits/rejected": -2.624908924102783, "logps/chosen": -152.46946716308594, "logps/rejected": -176.73748779296875, "loss": 0.7849, "rewards/accuracies": 0.5, "rewards/chosen": -0.5938777923583984, "rewards/margins": -0.11171096563339233, "rewards/rejected": -0.4821667969226837, "step": 384 }, { "epoch": 0.5, "learning_rate": 4.794329640873285e-05, "logits/chosen": -2.552907705307007, "logits/rejected": -2.564852237701416, "logps/chosen": -215.24732971191406, "logps/rejected": -214.79995727539062, "loss": 0.725, "rewards/accuracies": 0.4375, "rewards/chosen": -0.522678017616272, "rewards/margins": 0.005169212818145752, "rewards/rejected": -0.527847170829773, "step": 385 }, { "epoch": 0.51, "learning_rate": 4.7929041073130867e-05, "logits/chosen": -2.681658983230591, "logits/rejected": -2.733393669128418, "logps/chosen": -154.22958374023438, "logps/rejected": -182.50245666503906, "loss": 0.6893, "rewards/accuracies": 0.5625, "rewards/chosen": -0.47931969165802, "rewards/margins": 0.07224328815937042, "rewards/rejected": -0.5515629649162292, "step": 386 }, { "epoch": 0.51, "learning_rate": 4.7914738639312165e-05, "logits/chosen": -2.462939739227295, "logits/rejected": -2.5003418922424316, "logps/chosen": -152.27236938476562, "logps/rejected": -162.45211791992188, "loss": 0.6494, "rewards/accuracies": 0.5, "rewards/chosen": -0.47114118933677673, "rewards/margins": 0.16562382876873016, "rewards/rejected": -0.6367650628089905, "step": 387 }, { "epoch": 0.51, "learning_rate": 4.790038913665519e-05, "logits/chosen": -2.4999754428863525, "logits/rejected": -2.5065388679504395, "logps/chosen": -213.994873046875, "logps/rejected": -214.79046630859375, "loss": 0.6382, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5417348146438599, "rewards/margins": 0.19294968247413635, "rewards/rejected": -0.7346844673156738, "step": 388 }, { "epoch": 0.51, "learning_rate": 4.788599259463502e-05, "logits/chosen": -2.5474274158477783, "logits/rejected": -2.5991437435150146, "logps/chosen": -180.50039672851562, "logps/rejected": -225.59510803222656, "loss": 0.6276, "rewards/accuracies": 0.625, "rewards/chosen": -0.7096745371818542, "rewards/margins": 0.2028300166130066, "rewards/rejected": -0.9125044941902161, "step": 389 }, { "epoch": 0.51, "learning_rate": 4.787154904282341e-05, "logits/chosen": -2.5795090198516846, "logits/rejected": -2.577873468399048, "logps/chosen": -163.51168823242188, "logps/rejected": -140.55410766601562, "loss": 0.794, "rewards/accuracies": 0.25, "rewards/chosen": -0.5473388433456421, "rewards/margins": -0.11595198512077332, "rewards/rejected": -0.4313868582248688, "step": 390 }, { "epoch": 0.51, "learning_rate": 4.7857058510888645e-05, "logits/chosen": -2.516535758972168, "logits/rejected": -2.5428926944732666, "logps/chosen": -208.22772216796875, "logps/rejected": -231.32977294921875, "loss": 0.6828, "rewards/accuracies": 0.5, "rewards/chosen": -0.5902161598205566, "rewards/margins": 0.16028547286987305, "rewards/rejected": -0.7505015730857849, "step": 391 }, { "epoch": 0.51, "learning_rate": 4.7842521028595526e-05, "logits/chosen": -2.379845380783081, "logits/rejected": -2.3832809925079346, "logps/chosen": -200.7274169921875, "logps/rejected": -164.21742248535156, "loss": 0.6737, "rewards/accuracies": 0.75, "rewards/chosen": -0.5952718257904053, "rewards/margins": 0.09693758189678192, "rewards/rejected": -0.6922094225883484, "step": 392 }, { "epoch": 0.51, "learning_rate": 4.7827936625805284e-05, "logits/chosen": -2.6402270793914795, "logits/rejected": -2.7206149101257324, "logps/chosen": -166.23167419433594, "logps/rejected": -214.43209838867188, "loss": 0.6564, "rewards/accuracies": 0.625, "rewards/chosen": -0.5481399893760681, "rewards/margins": 0.19679725170135498, "rewards/rejected": -0.7449373006820679, "step": 393 }, { "epoch": 0.52, "learning_rate": 4.7813305332475535e-05, "logits/chosen": -2.5474841594696045, "logits/rejected": -2.398405075073242, "logps/chosen": -184.8474884033203, "logps/rejected": -174.7438507080078, "loss": 0.814, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6769263744354248, "rewards/margins": -0.10798013210296631, "rewards/rejected": -0.5689462423324585, "step": 394 }, { "epoch": 0.52, "learning_rate": 4.77986271786602e-05, "logits/chosen": -2.4804301261901855, "logits/rejected": -2.41290545463562, "logps/chosen": -196.86683654785156, "logps/rejected": -196.78880310058594, "loss": 0.7094, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5920239686965942, "rewards/margins": 0.030932970345020294, "rewards/rejected": -0.6229569315910339, "step": 395 }, { "epoch": 0.52, "learning_rate": 4.778390219450949e-05, "logits/chosen": -2.37176775932312, "logits/rejected": -2.5298891067504883, "logps/chosen": -161.76962280273438, "logps/rejected": -179.2218475341797, "loss": 0.6622, "rewards/accuracies": 0.625, "rewards/chosen": -0.6016180515289307, "rewards/margins": 0.17984583973884583, "rewards/rejected": -0.7814638614654541, "step": 396 }, { "epoch": 0.52, "learning_rate": 4.776913041026976e-05, "logits/chosen": -2.4908242225646973, "logits/rejected": -2.6029117107391357, "logps/chosen": -147.92144775390625, "logps/rejected": -160.30494689941406, "loss": 0.7192, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4882547855377197, "rewards/margins": 0.03175659850239754, "rewards/rejected": -0.5200113654136658, "step": 397 }, { "epoch": 0.52, "learning_rate": 4.775431185628353e-05, "logits/chosen": -2.4055118560791016, "logits/rejected": -2.442111015319824, "logps/chosen": -168.19540405273438, "logps/rejected": -166.55137634277344, "loss": 0.7503, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6233083605766296, "rewards/margins": -0.03455538675189018, "rewards/rejected": -0.5887529850006104, "step": 398 }, { "epoch": 0.52, "learning_rate": 4.7739446562989384e-05, "logits/chosen": -2.4722681045532227, "logits/rejected": -2.572155237197876, "logps/chosen": -207.08755493164062, "logps/rejected": -210.96417236328125, "loss": 0.6213, "rewards/accuracies": 0.625, "rewards/chosen": -0.5305930376052856, "rewards/margins": 0.21656833589076996, "rewards/rejected": -0.7471613883972168, "step": 399 }, { "epoch": 0.52, "learning_rate": 4.772453456092191e-05, "logits/chosen": -2.158127546310425, "logits/rejected": -2.197726249694824, "logps/chosen": -188.421142578125, "logps/rejected": -190.73631286621094, "loss": 0.6757, "rewards/accuracies": 0.5, "rewards/chosen": -0.4641679525375366, "rewards/margins": 0.13141167163848877, "rewards/rejected": -0.5955795645713806, "step": 400 }, { "epoch": 0.52, "learning_rate": 4.7709575880711634e-05, "logits/chosen": -2.5840139389038086, "logits/rejected": -2.645707130432129, "logps/chosen": -188.22262573242188, "logps/rejected": -196.6393585205078, "loss": 0.6874, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7071235775947571, "rewards/margins": 0.0883268266916275, "rewards/rejected": -0.7954504489898682, "step": 401 }, { "epoch": 0.53, "learning_rate": 4.769457055308497e-05, "logits/chosen": -2.4420454502105713, "logits/rejected": -2.4484505653381348, "logps/chosen": -169.99232482910156, "logps/rejected": -169.7140350341797, "loss": 0.7436, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5594102144241333, "rewards/margins": -0.01718483492732048, "rewards/rejected": -0.5422253608703613, "step": 402 }, { "epoch": 0.53, "learning_rate": 4.767951860886415e-05, "logits/chosen": -2.530424118041992, "logits/rejected": -2.601032257080078, "logps/chosen": -208.61627197265625, "logps/rejected": -235.3765411376953, "loss": 0.671, "rewards/accuracies": 0.5, "rewards/chosen": -0.7119523882865906, "rewards/margins": 0.13131096959114075, "rewards/rejected": -0.8432632684707642, "step": 403 }, { "epoch": 0.53, "learning_rate": 4.766442007896715e-05, "logits/chosen": -2.662515640258789, "logits/rejected": -2.574281930923462, "logps/chosen": -206.23391723632812, "logps/rejected": -185.04434204101562, "loss": 0.6698, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6086325645446777, "rewards/margins": 0.1881732940673828, "rewards/rejected": -0.7968058586120605, "step": 404 }, { "epoch": 0.53, "learning_rate": 4.764927499440767e-05, "logits/chosen": -2.4306862354278564, "logits/rejected": -2.48148512840271, "logps/chosen": -149.6880340576172, "logps/rejected": -164.9370574951172, "loss": 0.6242, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5055115222930908, "rewards/margins": 0.18054383993148804, "rewards/rejected": -0.6860553026199341, "step": 405 }, { "epoch": 0.53, "learning_rate": 4.763408338629498e-05, "logits/chosen": -2.6945927143096924, "logits/rejected": -2.712038278579712, "logps/chosen": -195.50482177734375, "logps/rejected": -207.51431274414062, "loss": 0.7339, "rewards/accuracies": 0.5, "rewards/chosen": -0.7351135015487671, "rewards/margins": 0.00292108952999115, "rewards/rejected": -0.7380346059799194, "step": 406 }, { "epoch": 0.53, "learning_rate": 4.761884528583396e-05, "logits/chosen": -2.4739484786987305, "logits/rejected": -2.5098178386688232, "logps/chosen": -199.3061065673828, "logps/rejected": -180.8711395263672, "loss": 0.7055, "rewards/accuracies": 0.5, "rewards/chosen": -0.6267393827438354, "rewards/margins": 0.034921929240226746, "rewards/rejected": -0.6616613268852234, "step": 407 }, { "epoch": 0.53, "learning_rate": 4.760356072432498e-05, "logits/chosen": -2.5606906414031982, "logits/rejected": -2.6224427223205566, "logps/chosen": -147.893798828125, "logps/rejected": -156.9450225830078, "loss": 0.7825, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8380388021469116, "rewards/margins": -0.05697247013449669, "rewards/rejected": -0.7810662388801575, "step": 408 }, { "epoch": 0.54, "learning_rate": 4.7588229733163834e-05, "logits/chosen": -2.479783058166504, "logits/rejected": -2.5184426307678223, "logps/chosen": -214.60830688476562, "logps/rejected": -204.91534423828125, "loss": 0.6624, "rewards/accuracies": 0.5625, "rewards/chosen": -0.745360255241394, "rewards/margins": 0.17358280718326569, "rewards/rejected": -0.9189431071281433, "step": 409 }, { "epoch": 0.54, "learning_rate": 4.757285234384169e-05, "logits/chosen": -2.5682218074798584, "logits/rejected": -2.5506396293640137, "logps/chosen": -158.36090087890625, "logps/rejected": -163.76829528808594, "loss": 0.7196, "rewards/accuracies": 0.625, "rewards/chosen": -0.7472302913665771, "rewards/margins": 0.03290612995624542, "rewards/rejected": -0.7801364064216614, "step": 410 }, { "epoch": 0.54, "learning_rate": 4.755742858794503e-05, "logits/chosen": -2.476301908493042, "logits/rejected": -2.4283623695373535, "logps/chosen": -180.48712158203125, "logps/rejected": -190.4640655517578, "loss": 0.7285, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5941387414932251, "rewards/margins": -0.008222660049796104, "rewards/rejected": -0.5859161019325256, "step": 411 }, { "epoch": 0.54, "learning_rate": 4.754195849715557e-05, "logits/chosen": -2.4996767044067383, "logits/rejected": -2.5315232276916504, "logps/chosen": -211.7996826171875, "logps/rejected": -221.70632934570312, "loss": 0.6269, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6848435997962952, "rewards/margins": 0.2251822054386139, "rewards/rejected": -0.9100258350372314, "step": 412 }, { "epoch": 0.54, "learning_rate": 4.75264421032502e-05, "logits/chosen": -2.3915770053863525, "logits/rejected": -2.443027973175049, "logps/chosen": -189.36318969726562, "logps/rejected": -234.4507293701172, "loss": 0.59, "rewards/accuracies": 0.625, "rewards/chosen": -0.6983102560043335, "rewards/margins": 0.33968544006347656, "rewards/rejected": -1.03799569606781, "step": 413 }, { "epoch": 0.54, "learning_rate": 4.751087943810093e-05, "logits/chosen": -2.4122979640960693, "logits/rejected": -2.4623374938964844, "logps/chosen": -150.90496826171875, "logps/rejected": -155.33792114257812, "loss": 0.5973, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6139358282089233, "rewards/margins": 0.2593153715133667, "rewards/rejected": -0.87325119972229, "step": 414 }, { "epoch": 0.54, "learning_rate": 4.749527053367481e-05, "logits/chosen": -2.4751663208007812, "logits/rejected": -2.5102648735046387, "logps/chosen": -199.7110595703125, "logps/rejected": -211.5991973876953, "loss": 0.6728, "rewards/accuracies": 0.5, "rewards/chosen": -0.5334303379058838, "rewards/margins": 0.10008269548416138, "rewards/rejected": -0.6335129737854004, "step": 415 }, { "epoch": 0.54, "learning_rate": 4.747961542203386e-05, "logits/chosen": -2.458789110183716, "logits/rejected": -2.4540915489196777, "logps/chosen": -202.07606506347656, "logps/rejected": -233.15286254882812, "loss": 0.7338, "rewards/accuracies": 0.5, "rewards/chosen": -0.7183946371078491, "rewards/margins": -0.020335379987955093, "rewards/rejected": -0.6980592608451843, "step": 416 }, { "epoch": 0.55, "learning_rate": 4.746391413533503e-05, "logits/chosen": -2.577547788619995, "logits/rejected": -2.6487934589385986, "logps/chosen": -200.61962890625, "logps/rejected": -197.4369354248047, "loss": 0.6989, "rewards/accuracies": 0.5, "rewards/chosen": -0.6610112190246582, "rewards/margins": 0.05467906594276428, "rewards/rejected": -0.7156902551651001, "step": 417 }, { "epoch": 0.55, "learning_rate": 4.74481667058301e-05, "logits/chosen": -2.460482120513916, "logits/rejected": -2.428621768951416, "logps/chosen": -173.61727905273438, "logps/rejected": -174.00869750976562, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": -0.6227931380271912, "rewards/margins": 0.061159878969192505, "rewards/rejected": -0.6839529275894165, "step": 418 }, { "epoch": 0.55, "learning_rate": 4.743237316586564e-05, "logits/chosen": -2.7055728435516357, "logits/rejected": -2.694801092147827, "logps/chosen": -233.16864013671875, "logps/rejected": -219.25518798828125, "loss": 0.5653, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7030515670776367, "rewards/margins": 0.3454846143722534, "rewards/rejected": -1.0485361814498901, "step": 419 }, { "epoch": 0.55, "learning_rate": 4.741653354788295e-05, "logits/chosen": -2.5690486431121826, "logits/rejected": -2.5955686569213867, "logps/chosen": -219.93161010742188, "logps/rejected": -219.0428466796875, "loss": 0.6281, "rewards/accuracies": 0.625, "rewards/chosen": -0.6727169156074524, "rewards/margins": 0.190028116106987, "rewards/rejected": -0.8627450466156006, "step": 420 }, { "epoch": 0.55, "learning_rate": 4.7400647884417956e-05, "logits/chosen": -2.4904708862304688, "logits/rejected": -2.62727689743042, "logps/chosen": -178.09906005859375, "logps/rejected": -193.81495666503906, "loss": 0.9252, "rewards/accuracies": 0.5, "rewards/chosen": -0.8763137459754944, "rewards/margins": -0.29989194869995117, "rewards/rejected": -0.5764217376708984, "step": 421 }, { "epoch": 0.55, "learning_rate": 4.7384716208101166e-05, "logits/chosen": -2.443995952606201, "logits/rejected": -2.4667203426361084, "logps/chosen": -187.14244079589844, "logps/rejected": -178.54718017578125, "loss": 0.6852, "rewards/accuracies": 0.5, "rewards/chosen": -0.5054149031639099, "rewards/margins": 0.07299378514289856, "rewards/rejected": -0.5784087777137756, "step": 422 }, { "epoch": 0.55, "learning_rate": 4.736873855165762e-05, "logits/chosen": -2.535909652709961, "logits/rejected": -2.5671615600585938, "logps/chosen": -188.70001220703125, "logps/rejected": -204.35406494140625, "loss": 0.5707, "rewards/accuracies": 0.6875, "rewards/chosen": -0.46517521142959595, "rewards/margins": 0.3614599108695984, "rewards/rejected": -0.8266351222991943, "step": 423 }, { "epoch": 0.55, "learning_rate": 4.735271494790678e-05, "logits/chosen": -2.528953790664673, "logits/rejected": -2.6590449810028076, "logps/chosen": -187.15150451660156, "logps/rejected": -224.23590087890625, "loss": 0.5801, "rewards/accuracies": 0.75, "rewards/chosen": -0.5339723229408264, "rewards/margins": 0.28138864040374756, "rewards/rejected": -0.8153610229492188, "step": 424 }, { "epoch": 0.56, "learning_rate": 4.733664542976253e-05, "logits/chosen": -2.4930217266082764, "logits/rejected": -2.5632638931274414, "logps/chosen": -190.24932861328125, "logps/rejected": -229.89834594726562, "loss": 0.6699, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7542927265167236, "rewards/margins": 0.1013142317533493, "rewards/rejected": -0.8556069731712341, "step": 425 }, { "epoch": 0.56, "learning_rate": 4.732053003023301e-05, "logits/chosen": -2.4712095260620117, "logits/rejected": -2.5324177742004395, "logps/chosen": -174.3723907470703, "logps/rejected": -206.7808837890625, "loss": 0.6939, "rewards/accuracies": 0.625, "rewards/chosen": -0.6538185477256775, "rewards/margins": 0.04219439998269081, "rewards/rejected": -0.6960129141807556, "step": 426 }, { "epoch": 0.56, "learning_rate": 4.730436878242064e-05, "logits/chosen": -2.5648436546325684, "logits/rejected": -2.6675539016723633, "logps/chosen": -201.86000061035156, "logps/rejected": -222.20242309570312, "loss": 0.5946, "rewards/accuracies": 0.8125, "rewards/chosen": -0.752447783946991, "rewards/margins": 0.23879489302635193, "rewards/rejected": -0.9912427067756653, "step": 427 }, { "epoch": 0.56, "learning_rate": 4.7288161719522016e-05, "logits/chosen": -2.55771541595459, "logits/rejected": -2.597944974899292, "logps/chosen": -168.26791381835938, "logps/rejected": -158.33251953125, "loss": 0.647, "rewards/accuracies": 0.5, "rewards/chosen": -0.43088653683662415, "rewards/margins": 0.134020134806633, "rewards/rejected": -0.5649065971374512, "step": 428 }, { "epoch": 0.56, "learning_rate": 4.727190887482783e-05, "logits/chosen": -2.801307201385498, "logits/rejected": -2.7668023109436035, "logps/chosen": -221.6161346435547, "logps/rejected": -212.2379913330078, "loss": 0.6348, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5888996124267578, "rewards/margins": 0.16605983674526215, "rewards/rejected": -0.7549594044685364, "step": 429 }, { "epoch": 0.56, "learning_rate": 4.725561028172282e-05, "logits/chosen": -2.459953784942627, "logits/rejected": -2.4980850219726562, "logps/chosen": -201.91897583007812, "logps/rejected": -208.00759887695312, "loss": 0.6881, "rewards/accuracies": 0.375, "rewards/chosen": -0.6136542558670044, "rewards/margins": 0.06774169206619263, "rewards/rejected": -0.6813960075378418, "step": 430 }, { "epoch": 0.56, "learning_rate": 4.7239265973685696e-05, "logits/chosen": -2.551776647567749, "logits/rejected": -2.5754733085632324, "logps/chosen": -185.41299438476562, "logps/rejected": -187.89947509765625, "loss": 0.6601, "rewards/accuracies": 0.5, "rewards/chosen": -0.5381264090538025, "rewards/margins": 0.11568471044301987, "rewards/rejected": -0.6538110971450806, "step": 431 }, { "epoch": 0.57, "learning_rate": 4.722287598428907e-05, "logits/chosen": -2.588721990585327, "logits/rejected": -2.629152774810791, "logps/chosen": -197.33180236816406, "logps/rejected": -211.42718505859375, "loss": 0.5501, "rewards/accuracies": 0.625, "rewards/chosen": -0.6033443808555603, "rewards/margins": 0.405744731426239, "rewards/rejected": -1.0090891122817993, "step": 432 }, { "epoch": 0.57, "learning_rate": 4.720644034719938e-05, "logits/chosen": -2.5057499408721924, "logits/rejected": -2.6138525009155273, "logps/chosen": -175.42535400390625, "logps/rejected": -199.4925079345703, "loss": 0.723, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8974592685699463, "rewards/margins": 0.013055291026830673, "rewards/rejected": -0.9105146527290344, "step": 433 }, { "epoch": 0.57, "learning_rate": 4.7189959096176825e-05, "logits/chosen": -2.4195330142974854, "logits/rejected": -2.467151165008545, "logps/chosen": -168.0084686279297, "logps/rejected": -181.73574829101562, "loss": 0.8057, "rewards/accuracies": 0.375, "rewards/chosen": -0.7599891424179077, "rewards/margins": -0.08929312229156494, "rewards/rejected": -0.6706960201263428, "step": 434 }, { "epoch": 0.57, "learning_rate": 4.7173432265075334e-05, "logits/chosen": -2.511300802230835, "logits/rejected": -2.510061502456665, "logps/chosen": -195.68515014648438, "logps/rejected": -173.744140625, "loss": 0.6779, "rewards/accuracies": 0.625, "rewards/chosen": -0.8897146582603455, "rewards/margins": 0.1359187215566635, "rewards/rejected": -1.0256333351135254, "step": 435 }, { "epoch": 0.57, "learning_rate": 4.7156859887842416e-05, "logits/chosen": -2.5270705223083496, "logits/rejected": -2.5860514640808105, "logps/chosen": -211.92176818847656, "logps/rejected": -210.06216430664062, "loss": 0.7143, "rewards/accuracies": 0.75, "rewards/chosen": -0.7650929689407349, "rewards/margins": 0.06967158615589142, "rewards/rejected": -0.8347646594047546, "step": 436 }, { "epoch": 0.57, "learning_rate": 4.714024199851915e-05, "logits/chosen": -2.6095833778381348, "logits/rejected": -2.683137893676758, "logps/chosen": -166.6543426513672, "logps/rejected": -197.75425720214844, "loss": 0.7672, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9517545104026794, "rewards/margins": -0.0435512438416481, "rewards/rejected": -0.9082032442092896, "step": 437 }, { "epoch": 0.57, "learning_rate": 4.712357863124013e-05, "logits/chosen": -2.583829641342163, "logits/rejected": -2.4270806312561035, "logps/chosen": -184.97288513183594, "logps/rejected": -151.17689514160156, "loss": 0.8427, "rewards/accuracies": 0.4375, "rewards/chosen": -0.9021130800247192, "rewards/margins": -0.22366446256637573, "rewards/rejected": -0.6784486770629883, "step": 438 }, { "epoch": 0.57, "learning_rate": 4.710686982023332e-05, "logits/chosen": -2.6779322624206543, "logits/rejected": -2.6819846630096436, "logps/chosen": -202.7177734375, "logps/rejected": -180.97412109375, "loss": 0.6769, "rewards/accuracies": 0.625, "rewards/chosen": -0.7709564566612244, "rewards/margins": 0.1275014728307724, "rewards/rejected": -0.8984578251838684, "step": 439 }, { "epoch": 0.58, "learning_rate": 4.709011559982006e-05, "logits/chosen": -2.5081942081451416, "logits/rejected": -2.457127094268799, "logps/chosen": -199.16038513183594, "logps/rejected": -227.66439819335938, "loss": 0.715, "rewards/accuracies": 0.5, "rewards/chosen": -0.8098132014274597, "rewards/margins": 0.005300614982843399, "rewards/rejected": -0.8151137232780457, "step": 440 }, { "epoch": 0.58, "learning_rate": 4.707331600441495e-05, "logits/chosen": -2.5350513458251953, "logits/rejected": -2.498892307281494, "logps/chosen": -169.93370056152344, "logps/rejected": -166.59365844726562, "loss": 0.6809, "rewards/accuracies": 0.5, "rewards/chosen": -0.6930581331253052, "rewards/margins": 0.06999292969703674, "rewards/rejected": -0.7630510330200195, "step": 441 }, { "epoch": 0.58, "learning_rate": 4.705647106852581e-05, "logits/chosen": -2.5681025981903076, "logits/rejected": -2.5515594482421875, "logps/chosen": -232.0432586669922, "logps/rejected": -224.9477081298828, "loss": 0.7262, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9011371731758118, "rewards/margins": 0.08735474199056625, "rewards/rejected": -0.988491952419281, "step": 442 }, { "epoch": 0.58, "learning_rate": 4.7039580826753564e-05, "logits/chosen": -2.538968563079834, "logits/rejected": -2.5133233070373535, "logps/chosen": -158.14520263671875, "logps/rejected": -201.77728271484375, "loss": 0.6074, "rewards/accuracies": 0.75, "rewards/chosen": -0.6365475654602051, "rewards/margins": 0.2535760998725891, "rewards/rejected": -0.8901236057281494, "step": 443 }, { "epoch": 0.58, "learning_rate": 4.7022645313792235e-05, "logits/chosen": -2.5245423316955566, "logits/rejected": -2.588785171508789, "logps/chosen": -211.5054473876953, "logps/rejected": -183.34361267089844, "loss": 0.6541, "rewards/accuracies": 0.625, "rewards/chosen": -0.7383630275726318, "rewards/margins": 0.1513664424419403, "rewards/rejected": -0.8897294998168945, "step": 444 }, { "epoch": 0.58, "learning_rate": 4.700566456442882e-05, "logits/chosen": -2.426429033279419, "logits/rejected": -2.4127559661865234, "logps/chosen": -170.16091918945312, "logps/rejected": -181.26731872558594, "loss": 0.685, "rewards/accuracies": 0.5, "rewards/chosen": -0.555845320224762, "rewards/margins": 0.09964090585708618, "rewards/rejected": -0.6554862260818481, "step": 445 }, { "epoch": 0.58, "learning_rate": 4.6988638613543216e-05, "logits/chosen": -2.5047028064727783, "logits/rejected": -2.6594462394714355, "logps/chosen": -192.5770263671875, "logps/rejected": -196.17718505859375, "loss": 0.6536, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6389065384864807, "rewards/margins": 0.1103520542383194, "rewards/rejected": -0.7492585778236389, "step": 446 }, { "epoch": 0.59, "learning_rate": 4.6971567496108206e-05, "logits/chosen": -2.4204087257385254, "logits/rejected": -2.4866108894348145, "logps/chosen": -186.6705780029297, "logps/rejected": -183.05654907226562, "loss": 0.6868, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7241983413696289, "rewards/margins": 0.062242452055215836, "rewards/rejected": -0.7864408493041992, "step": 447 }, { "epoch": 0.59, "learning_rate": 4.695445124718931e-05, "logits/chosen": -2.599712610244751, "logits/rejected": -2.678144693374634, "logps/chosen": -171.8934326171875, "logps/rejected": -187.55401611328125, "loss": 0.7119, "rewards/accuracies": 0.625, "rewards/chosen": -0.8643355369567871, "rewards/margins": 0.07741285860538483, "rewards/rejected": -0.9417483806610107, "step": 448 }, { "epoch": 0.59, "learning_rate": 4.693728990194479e-05, "logits/chosen": -2.5691065788269043, "logits/rejected": -2.515324592590332, "logps/chosen": -175.92041015625, "logps/rejected": -169.9009552001953, "loss": 0.7915, "rewards/accuracies": 0.375, "rewards/chosen": -0.7048065662384033, "rewards/margins": -0.13590610027313232, "rewards/rejected": -0.5689005851745605, "step": 449 }, { "epoch": 0.59, "learning_rate": 4.692008349562551e-05, "logits/chosen": -2.5656256675720215, "logits/rejected": -2.5296895503997803, "logps/chosen": -181.8729248046875, "logps/rejected": -166.05845642089844, "loss": 0.6528, "rewards/accuracies": 0.625, "rewards/chosen": -0.6643526554107666, "rewards/margins": 0.1753290742635727, "rewards/rejected": -0.8396817445755005, "step": 450 }, { "epoch": 0.59, "learning_rate": 4.690283206357491e-05, "logits/chosen": -2.6595234870910645, "logits/rejected": -2.6016488075256348, "logps/chosen": -227.77488708496094, "logps/rejected": -197.7751007080078, "loss": 0.7501, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9334564208984375, "rewards/margins": 0.008943833410739899, "rewards/rejected": -0.942400336265564, "step": 451 }, { "epoch": 0.59, "learning_rate": 4.6885535641228904e-05, "logits/chosen": -2.6872832775115967, "logits/rejected": -2.6639106273651123, "logps/chosen": -198.11798095703125, "logps/rejected": -215.65846252441406, "loss": 0.6973, "rewards/accuracies": 0.4375, "rewards/chosen": -0.661568284034729, "rewards/margins": 0.10554268211126328, "rewards/rejected": -0.7671110033988953, "step": 452 }, { "epoch": 0.59, "learning_rate": 4.6868194264115833e-05, "logits/chosen": -2.6474506855010986, "logits/rejected": -2.6153347492218018, "logps/chosen": -179.79026794433594, "logps/rejected": -181.140869140625, "loss": 0.668, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7167541980743408, "rewards/margins": 0.10385677218437195, "rewards/rejected": -0.8206108808517456, "step": 453 }, { "epoch": 0.59, "learning_rate": 4.685080796785637e-05, "logits/chosen": -2.6611721515655518, "logits/rejected": -2.5946366786956787, "logps/chosen": -175.3307647705078, "logps/rejected": -182.19512939453125, "loss": 0.6937, "rewards/accuracies": 0.5, "rewards/chosen": -0.5467984080314636, "rewards/margins": 0.12983733415603638, "rewards/rejected": -0.6766356825828552, "step": 454 }, { "epoch": 0.6, "learning_rate": 4.683337678816345e-05, "logits/chosen": -2.564751148223877, "logits/rejected": -2.610701322555542, "logps/chosen": -160.96405029296875, "logps/rejected": -211.45936584472656, "loss": 0.5668, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6448365449905396, "rewards/margins": 0.34849783778190613, "rewards/rejected": -0.9933344125747681, "step": 455 }, { "epoch": 0.6, "learning_rate": 4.6815900760842236e-05, "logits/chosen": -2.5778658390045166, "logits/rejected": -2.650463104248047, "logps/chosen": -178.8852081298828, "logps/rejected": -184.25819396972656, "loss": 0.6166, "rewards/accuracies": 0.75, "rewards/chosen": -0.6312435865402222, "rewards/margins": 0.21195663511753082, "rewards/rejected": -0.8432002067565918, "step": 456 }, { "epoch": 0.6, "learning_rate": 4.679837992178996e-05, "logits/chosen": -2.6255109310150146, "logits/rejected": -2.568089246749878, "logps/chosen": -186.581787109375, "logps/rejected": -179.66636657714844, "loss": 0.7966, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7909551858901978, "rewards/margins": -0.12891662120819092, "rewards/rejected": -0.6620385646820068, "step": 457 }, { "epoch": 0.6, "learning_rate": 4.678081430699594e-05, "logits/chosen": -2.548609972000122, "logits/rejected": -2.5789599418640137, "logps/chosen": -149.14419555664062, "logps/rejected": -175.63099670410156, "loss": 0.672, "rewards/accuracies": 0.375, "rewards/chosen": -0.6500214338302612, "rewards/margins": 0.1032710000872612, "rewards/rejected": -0.7532925009727478, "step": 458 }, { "epoch": 0.6, "learning_rate": 4.676320395254146e-05, "logits/chosen": -2.6516225337982178, "logits/rejected": -2.5997962951660156, "logps/chosen": -179.6166229248047, "logps/rejected": -181.91600036621094, "loss": 0.6797, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5761557221412659, "rewards/margins": 0.09436798840761185, "rewards/rejected": -0.6705237627029419, "step": 459 }, { "epoch": 0.6, "learning_rate": 4.674554889459968e-05, "logits/chosen": -2.547447681427002, "logits/rejected": -2.531512498855591, "logps/chosen": -158.489013671875, "logps/rejected": -158.9821319580078, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": -0.551589846611023, "rewards/margins": 0.06521686166524887, "rewards/rejected": -0.61680668592453, "step": 460 }, { "epoch": 0.6, "learning_rate": 4.672784916943562e-05, "logits/chosen": -2.6113603115081787, "logits/rejected": -2.7179596424102783, "logps/chosen": -161.98703002929688, "logps/rejected": -183.0166778564453, "loss": 0.6035, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4798649549484253, "rewards/margins": 0.25054579973220825, "rewards/rejected": -0.7304107546806335, "step": 461 }, { "epoch": 0.6, "learning_rate": 4.6710104813406034e-05, "logits/chosen": -2.542372703552246, "logits/rejected": -2.522775650024414, "logps/chosen": -179.12545776367188, "logps/rejected": -175.42910766601562, "loss": 0.6264, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5295111536979675, "rewards/margins": 0.1804201453924179, "rewards/rejected": -0.7099313139915466, "step": 462 }, { "epoch": 0.61, "learning_rate": 4.669231586295934e-05, "logits/chosen": -2.604105234146118, "logits/rejected": -2.6281673908233643, "logps/chosen": -201.89901733398438, "logps/rejected": -226.806396484375, "loss": 0.7092, "rewards/accuracies": 0.5, "rewards/chosen": -0.4338040053844452, "rewards/margins": 0.028335200622677803, "rewards/rejected": -0.46213918924331665, "step": 463 }, { "epoch": 0.61, "learning_rate": 4.667448235463557e-05, "logits/chosen": -2.639315605163574, "logits/rejected": -2.572453022003174, "logps/chosen": -211.58755493164062, "logps/rejected": -191.2517547607422, "loss": 0.5992, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4935193359851837, "rewards/margins": 0.28121161460876465, "rewards/rejected": -0.7747309803962708, "step": 464 }, { "epoch": 0.61, "learning_rate": 4.665660432506629e-05, "logits/chosen": -2.577928304672241, "logits/rejected": -2.5669662952423096, "logps/chosen": -200.63458251953125, "logps/rejected": -192.37644958496094, "loss": 0.684, "rewards/accuracies": 0.625, "rewards/chosen": -0.6348070502281189, "rewards/margins": 0.1238020658493042, "rewards/rejected": -0.7586091756820679, "step": 465 }, { "epoch": 0.61, "learning_rate": 4.6638681810974496e-05, "logits/chosen": -2.5102195739746094, "logits/rejected": -2.4570584297180176, "logps/chosen": -184.1721954345703, "logps/rejected": -186.46142578125, "loss": 0.6753, "rewards/accuracies": 0.625, "rewards/chosen": -0.5391435027122498, "rewards/margins": 0.12175430357456207, "rewards/rejected": -0.6608977913856506, "step": 466 }, { "epoch": 0.61, "learning_rate": 4.6620714849174576e-05, "logits/chosen": -2.5037519931793213, "logits/rejected": -2.514277219772339, "logps/chosen": -221.79428100585938, "logps/rejected": -226.41201782226562, "loss": 0.6953, "rewards/accuracies": 0.625, "rewards/chosen": -0.4555547833442688, "rewards/margins": 0.06287840008735657, "rewards/rejected": -0.5184332132339478, "step": 467 }, { "epoch": 0.61, "learning_rate": 4.660270347657219e-05, "logits/chosen": -2.4214844703674316, "logits/rejected": -2.451406717300415, "logps/chosen": -164.8655242919922, "logps/rejected": -202.42665100097656, "loss": 0.6557, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4477751851081848, "rewards/margins": 0.17700761556625366, "rewards/rejected": -0.6247828006744385, "step": 468 }, { "epoch": 0.61, "learning_rate": 4.658464773016428e-05, "logits/chosen": -2.6548826694488525, "logits/rejected": -2.7413270473480225, "logps/chosen": -203.27706909179688, "logps/rejected": -195.96438598632812, "loss": 0.7196, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6162900924682617, "rewards/margins": 0.04476276412606239, "rewards/rejected": -0.6610528230667114, "step": 469 }, { "epoch": 0.62, "learning_rate": 4.6566547647038864e-05, "logits/chosen": -2.6256935596466064, "logits/rejected": -2.544764757156372, "logps/chosen": -197.11715698242188, "logps/rejected": -174.37977600097656, "loss": 0.5997, "rewards/accuracies": 0.75, "rewards/chosen": -0.7993966937065125, "rewards/margins": 0.2561304569244385, "rewards/rejected": -1.0555272102355957, "step": 470 }, { "epoch": 0.62, "learning_rate": 4.6548403264375074e-05, "logits/chosen": -2.4777655601501465, "logits/rejected": -2.466064691543579, "logps/chosen": -206.86097717285156, "logps/rejected": -222.15383911132812, "loss": 0.8362, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8698564767837524, "rewards/margins": -0.1482725888490677, "rewards/rejected": -0.721583902835846, "step": 471 }, { "epoch": 0.62, "learning_rate": 4.6530214619443037e-05, "logits/chosen": -2.5265445709228516, "logits/rejected": -2.5728039741516113, "logps/chosen": -205.71484375, "logps/rejected": -193.68008422851562, "loss": 0.7739, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6624203324317932, "rewards/margins": -0.09644677489995956, "rewards/rejected": -0.5659735202789307, "step": 472 }, { "epoch": 0.62, "learning_rate": 4.6511981749603775e-05, "logits/chosen": -2.4683680534362793, "logits/rejected": -2.553187847137451, "logps/chosen": -188.4405975341797, "logps/rejected": -206.36973571777344, "loss": 0.6291, "rewards/accuracies": 0.4375, "rewards/chosen": -0.44957125186920166, "rewards/margins": 0.21964800357818604, "rewards/rejected": -0.6692192554473877, "step": 473 }, { "epoch": 0.62, "learning_rate": 4.6493704692309175e-05, "logits/chosen": -2.4824113845825195, "logits/rejected": -2.4895737171173096, "logps/chosen": -173.37237548828125, "logps/rejected": -182.10430908203125, "loss": 0.6773, "rewards/accuracies": 0.5, "rewards/chosen": -0.5202856063842773, "rewards/margins": 0.12215665727853775, "rewards/rejected": -0.6424421668052673, "step": 474 }, { "epoch": 0.62, "learning_rate": 4.647538348510189e-05, "logits/chosen": -2.6831271648406982, "logits/rejected": -2.7396373748779297, "logps/chosen": -175.16824340820312, "logps/rejected": -194.82957458496094, "loss": 0.653, "rewards/accuracies": 0.625, "rewards/chosen": -0.6846888065338135, "rewards/margins": 0.17653468251228333, "rewards/rejected": -0.861223578453064, "step": 475 }, { "epoch": 0.62, "learning_rate": 4.645701816561523e-05, "logits/chosen": -2.55379581451416, "logits/rejected": -2.6678338050842285, "logps/chosen": -214.75527954101562, "logps/rejected": -180.83958435058594, "loss": 0.6331, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6419520974159241, "rewards/margins": 0.22850137948989868, "rewards/rejected": -0.8704534769058228, "step": 476 }, { "epoch": 0.62, "learning_rate": 4.643860877157314e-05, "logits/chosen": -2.560502529144287, "logits/rejected": -2.6059296131134033, "logps/chosen": -225.27346801757812, "logps/rejected": -238.6981658935547, "loss": 0.7312, "rewards/accuracies": 0.375, "rewards/chosen": -0.6071743369102478, "rewards/margins": 0.0013379361480474472, "rewards/rejected": -0.6085121631622314, "step": 477 }, { "epoch": 0.63, "learning_rate": 4.642015534079012e-05, "logits/chosen": -2.6502907276153564, "logits/rejected": -2.4651901721954346, "logps/chosen": -194.60215759277344, "logps/rejected": -208.71707153320312, "loss": 0.6499, "rewards/accuracies": 0.4375, "rewards/chosen": -0.41506141424179077, "rewards/margins": 0.14238694310188293, "rewards/rejected": -0.5574483871459961, "step": 478 }, { "epoch": 0.63, "learning_rate": 4.640165791117106e-05, "logits/chosen": -2.597409248352051, "logits/rejected": -2.671415090560913, "logps/chosen": -169.68240356445312, "logps/rejected": -189.24998474121094, "loss": 0.6624, "rewards/accuracies": 0.625, "rewards/chosen": -0.6197757124900818, "rewards/margins": 0.10906066745519638, "rewards/rejected": -0.7288363575935364, "step": 479 }, { "epoch": 0.63, "learning_rate": 4.63831165207113e-05, "logits/chosen": -2.2800724506378174, "logits/rejected": -2.3521509170532227, "logps/chosen": -171.45944213867188, "logps/rejected": -175.93577575683594, "loss": 0.6243, "rewards/accuracies": 0.625, "rewards/chosen": -0.498868465423584, "rewards/margins": 0.19707906246185303, "rewards/rejected": -0.6959475874900818, "step": 480 }, { "epoch": 0.63, "learning_rate": 4.6364531207496426e-05, "logits/chosen": -2.549999713897705, "logits/rejected": -2.5626368522644043, "logps/chosen": -162.32720947265625, "logps/rejected": -192.87612915039062, "loss": 0.7312, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5747624635696411, "rewards/margins": 0.024262502789497375, "rewards/rejected": -0.5990250110626221, "step": 481 }, { "epoch": 0.63, "learning_rate": 4.634590200970227e-05, "logits/chosen": -2.482072591781616, "logits/rejected": -2.6162638664245605, "logps/chosen": -176.58897399902344, "logps/rejected": -207.56654357910156, "loss": 0.6501, "rewards/accuracies": 0.625, "rewards/chosen": -0.7275586128234863, "rewards/margins": 0.14506082236766815, "rewards/rejected": -0.8726193904876709, "step": 482 }, { "epoch": 0.63, "learning_rate": 4.632722896559481e-05, "logits/chosen": -2.668024778366089, "logits/rejected": -2.6363847255706787, "logps/chosen": -263.9557800292969, "logps/rejected": -249.15301513671875, "loss": 0.7493, "rewards/accuracies": 0.625, "rewards/chosen": -1.0229195356369019, "rewards/margins": -0.055171869695186615, "rewards/rejected": -0.967747688293457, "step": 483 }, { "epoch": 0.63, "learning_rate": 4.630851211353007e-05, "logits/chosen": -2.6272599697113037, "logits/rejected": -2.644105911254883, "logps/chosen": -169.70347595214844, "logps/rejected": -194.94564819335938, "loss": 0.6384, "rewards/accuracies": 0.5, "rewards/chosen": -0.7246192693710327, "rewards/margins": 0.25421109795570374, "rewards/rejected": -0.9788303971290588, "step": 484 }, { "epoch": 0.63, "learning_rate": 4.628975149195407e-05, "logits/chosen": -2.530022621154785, "logits/rejected": -2.5307507514953613, "logps/chosen": -165.1283721923828, "logps/rejected": -191.00559997558594, "loss": 0.6223, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6375387907028198, "rewards/margins": 0.26712220907211304, "rewards/rejected": -0.9046609997749329, "step": 485 }, { "epoch": 0.64, "learning_rate": 4.6270947139402744e-05, "logits/chosen": -2.54582142829895, "logits/rejected": -2.5507116317749023, "logps/chosen": -197.43624877929688, "logps/rejected": -200.06396484375, "loss": 0.695, "rewards/accuracies": 0.5, "rewards/chosen": -0.6543585062026978, "rewards/margins": 0.08520226180553436, "rewards/rejected": -0.7395609021186829, "step": 486 }, { "epoch": 0.64, "learning_rate": 4.6252099094501834e-05, "logits/chosen": -2.502086639404297, "logits/rejected": -2.507930040359497, "logps/chosen": -192.9934539794922, "logps/rejected": -207.4755859375, "loss": 0.5737, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5259783864021301, "rewards/margins": 0.3163459002971649, "rewards/rejected": -0.8423242568969727, "step": 487 }, { "epoch": 0.64, "learning_rate": 4.623320739596685e-05, "logits/chosen": -2.5075860023498535, "logits/rejected": -2.537548065185547, "logps/chosen": -180.3726348876953, "logps/rejected": -195.20867919921875, "loss": 0.6342, "rewards/accuracies": 0.625, "rewards/chosen": -0.6498013138771057, "rewards/margins": 0.19693784415721893, "rewards/rejected": -0.8467391729354858, "step": 488 }, { "epoch": 0.64, "learning_rate": 4.621427208260296e-05, "logits/chosen": -2.5124759674072266, "logits/rejected": -2.5460541248321533, "logps/chosen": -170.8051300048828, "logps/rejected": -183.51568603515625, "loss": 0.6292, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7085085511207581, "rewards/margins": 0.24482092261314392, "rewards/rejected": -0.9533295035362244, "step": 489 }, { "epoch": 0.64, "learning_rate": 4.6195293193304915e-05, "logits/chosen": -2.568577289581299, "logits/rejected": -2.730349063873291, "logps/chosen": -157.42147827148438, "logps/rejected": -204.0237274169922, "loss": 0.6416, "rewards/accuracies": 0.5, "rewards/chosen": -0.7542508244514465, "rewards/margins": 0.18506285548210144, "rewards/rejected": -0.9393137693405151, "step": 490 }, { "epoch": 0.64, "learning_rate": 4.6176270767056976e-05, "logits/chosen": -2.658346652984619, "logits/rejected": -2.697397232055664, "logps/chosen": -160.69436645507812, "logps/rejected": -182.4514923095703, "loss": 0.6066, "rewards/accuracies": 0.625, "rewards/chosen": -0.5536147952079773, "rewards/margins": 0.21064530313014984, "rewards/rejected": -0.7642600536346436, "step": 491 }, { "epoch": 0.64, "learning_rate": 4.615720484293286e-05, "logits/chosen": -2.621380090713501, "logits/rejected": -2.5975751876831055, "logps/chosen": -172.44583129882812, "logps/rejected": -168.9969482421875, "loss": 0.7824, "rewards/accuracies": 0.3125, "rewards/chosen": -0.7546152472496033, "rewards/margins": -0.09415875375270844, "rewards/rejected": -0.6604564785957336, "step": 492 }, { "epoch": 0.65, "learning_rate": 4.613809546009558e-05, "logits/chosen": -2.5632882118225098, "logits/rejected": -2.528463840484619, "logps/chosen": -174.4139404296875, "logps/rejected": -178.07061767578125, "loss": 0.7851, "rewards/accuracies": 0.5, "rewards/chosen": -0.783379077911377, "rewards/margins": -0.13342618942260742, "rewards/rejected": -0.64995276927948, "step": 493 }, { "epoch": 0.65, "learning_rate": 4.611894265779748e-05, "logits/chosen": -2.6725411415100098, "logits/rejected": -2.6337125301361084, "logps/chosen": -158.43734741210938, "logps/rejected": -149.51394653320312, "loss": 0.6479, "rewards/accuracies": 0.625, "rewards/chosen": -0.7037851810455322, "rewards/margins": 0.15217718482017517, "rewards/rejected": -0.8559622764587402, "step": 494 }, { "epoch": 0.65, "learning_rate": 4.609974647538003e-05, "logits/chosen": -2.5839614868164062, "logits/rejected": -2.5436882972717285, "logps/chosen": -186.00880432128906, "logps/rejected": -176.95623779296875, "loss": 0.6352, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7761020660400391, "rewards/margins": 0.2549971640110016, "rewards/rejected": -1.0310992002487183, "step": 495 }, { "epoch": 0.65, "learning_rate": 4.608050695227385e-05, "logits/chosen": -2.663245439529419, "logits/rejected": -2.629906415939331, "logps/chosen": -187.883056640625, "logps/rejected": -181.8142852783203, "loss": 0.727, "rewards/accuracies": 0.5, "rewards/chosen": -0.8239506483078003, "rewards/margins": 0.07172393053770065, "rewards/rejected": -0.8956745862960815, "step": 496 }, { "epoch": 0.65, "learning_rate": 4.606122412799857e-05, "logits/chosen": -2.864988327026367, "logits/rejected": -2.8613595962524414, "logps/chosen": -179.66754150390625, "logps/rejected": -183.17547607421875, "loss": 0.7754, "rewards/accuracies": 0.3125, "rewards/chosen": -0.7847499847412109, "rewards/margins": -0.03732617199420929, "rewards/rejected": -0.7474238872528076, "step": 497 }, { "epoch": 0.65, "learning_rate": 4.6041898042162764e-05, "logits/chosen": -2.67081618309021, "logits/rejected": -2.630650281906128, "logps/chosen": -184.00350952148438, "logps/rejected": -213.1297607421875, "loss": 0.7759, "rewards/accuracies": 0.375, "rewards/chosen": -0.9428163170814514, "rewards/margins": -0.10311193764209747, "rewards/rejected": -0.8397043347358704, "step": 498 }, { "epoch": 0.65, "learning_rate": 4.602252873446386e-05, "logits/chosen": -2.5779166221618652, "logits/rejected": -2.657582998275757, "logps/chosen": -255.50413513183594, "logps/rejected": -295.5150146484375, "loss": 0.6389, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6901088356971741, "rewards/margins": 0.2417818158864975, "rewards/rejected": -0.9318906664848328, "step": 499 }, { "epoch": 0.65, "learning_rate": 4.60031162446881e-05, "logits/chosen": -2.6082730293273926, "logits/rejected": -2.7072064876556396, "logps/chosen": -186.39288330078125, "logps/rejected": -197.77581787109375, "loss": 0.7185, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8791942596435547, "rewards/margins": -0.021152300760149956, "rewards/rejected": -0.8580418825149536, "step": 500 }, { "epoch": 0.66, "learning_rate": 4.5983660612710365e-05, "logits/chosen": -2.44614315032959, "logits/rejected": -2.4296889305114746, "logps/chosen": -276.84466552734375, "logps/rejected": -263.2348327636719, "loss": 0.7359, "rewards/accuracies": 0.4375, "rewards/chosen": -0.878140389919281, "rewards/margins": -0.04955806955695152, "rewards/rejected": -0.8285822868347168, "step": 501 }, { "epoch": 0.66, "learning_rate": 4.596416187849423e-05, "logits/chosen": -2.619124412536621, "logits/rejected": -2.6439716815948486, "logps/chosen": -178.675537109375, "logps/rejected": -180.601806640625, "loss": 0.646, "rewards/accuracies": 0.6875, "rewards/chosen": -0.639778733253479, "rewards/margins": 0.13400733470916748, "rewards/rejected": -0.7737860679626465, "step": 502 }, { "epoch": 0.66, "learning_rate": 4.5944620082091745e-05, "logits/chosen": -2.5724973678588867, "logits/rejected": -2.5802013874053955, "logps/chosen": -155.44479370117188, "logps/rejected": -137.69415283203125, "loss": 0.6405, "rewards/accuracies": 0.625, "rewards/chosen": -0.7297097444534302, "rewards/margins": 0.16804258525371552, "rewards/rejected": -0.8977524042129517, "step": 503 }, { "epoch": 0.66, "learning_rate": 4.5925035263643444e-05, "logits/chosen": -2.74474835395813, "logits/rejected": -2.7397449016571045, "logps/chosen": -203.23423767089844, "logps/rejected": -243.91566467285156, "loss": 0.5604, "rewards/accuracies": 0.625, "rewards/chosen": -0.6856958866119385, "rewards/margins": 0.44254228472709656, "rewards/rejected": -1.1282380819320679, "step": 504 }, { "epoch": 0.66, "learning_rate": 4.5905407463378225e-05, "logits/chosen": -2.6757442951202393, "logits/rejected": -2.591583728790283, "logps/chosen": -182.1700897216797, "logps/rejected": -183.5037078857422, "loss": 0.6673, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6767271161079407, "rewards/margins": 0.10155776888132095, "rewards/rejected": -0.7782848477363586, "step": 505 }, { "epoch": 0.66, "learning_rate": 4.588573672161326e-05, "logits/chosen": -2.50715708732605, "logits/rejected": -2.579833507537842, "logps/chosen": -151.3063201904297, "logps/rejected": -178.596923828125, "loss": 0.667, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5291253328323364, "rewards/margins": 0.0967598482966423, "rewards/rejected": -0.6258851289749146, "step": 506 }, { "epoch": 0.66, "learning_rate": 4.586602307875396e-05, "logits/chosen": -2.591066598892212, "logits/rejected": -2.5947020053863525, "logps/chosen": -197.9405517578125, "logps/rejected": -199.92247009277344, "loss": 0.7325, "rewards/accuracies": 0.375, "rewards/chosen": -0.8253288269042969, "rewards/margins": 0.021833447739481926, "rewards/rejected": -0.8471622467041016, "step": 507 }, { "epoch": 0.66, "learning_rate": 4.5846266575293816e-05, "logits/chosen": -2.6202571392059326, "logits/rejected": -2.734771728515625, "logps/chosen": -162.9832000732422, "logps/rejected": -186.2266845703125, "loss": 0.6053, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8867194652557373, "rewards/margins": 0.30634805560112, "rewards/rejected": -1.1930675506591797, "step": 508 }, { "epoch": 0.67, "learning_rate": 4.582646725181441e-05, "logits/chosen": -2.785458564758301, "logits/rejected": -2.7122888565063477, "logps/chosen": -183.4889678955078, "logps/rejected": -173.61659240722656, "loss": 0.738, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8772555589675903, "rewards/margins": -0.023770909756422043, "rewards/rejected": -0.853484570980072, "step": 509 }, { "epoch": 0.67, "learning_rate": 4.580662514898522e-05, "logits/chosen": -2.644120216369629, "logits/rejected": -2.631178855895996, "logps/chosen": -183.57171630859375, "logps/rejected": -197.05075073242188, "loss": 0.6977, "rewards/accuracies": 0.5, "rewards/chosen": -0.8446710705757141, "rewards/margins": 0.08859595656394958, "rewards/rejected": -0.9332669973373413, "step": 510 }, { "epoch": 0.67, "learning_rate": 4.5786740307563636e-05, "logits/chosen": -2.693103790283203, "logits/rejected": -2.724278688430786, "logps/chosen": -200.4249725341797, "logps/rejected": -188.05783081054688, "loss": 0.746, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9058306813240051, "rewards/margins": -0.03762813284993172, "rewards/rejected": -0.8682026863098145, "step": 511 }, { "epoch": 0.67, "learning_rate": 4.576681276839483e-05, "logits/chosen": -2.6434645652770996, "logits/rejected": -2.641044855117798, "logps/chosen": -160.1006317138672, "logps/rejected": -175.09759521484375, "loss": 0.6674, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8065643310546875, "rewards/margins": 0.14114204049110413, "rewards/rejected": -0.9477063417434692, "step": 512 }, { "epoch": 0.67, "learning_rate": 4.574684257241168e-05, "logits/chosen": -2.797175884246826, "logits/rejected": -2.7650790214538574, "logps/chosen": -195.47828674316406, "logps/rejected": -176.28883361816406, "loss": 0.6555, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6963549852371216, "rewards/margins": 0.11164915561676025, "rewards/rejected": -0.8080041408538818, "step": 513 }, { "epoch": 0.67, "learning_rate": 4.572682976063468e-05, "logits/chosen": -2.6579763889312744, "logits/rejected": -2.6137382984161377, "logps/chosen": -196.55987548828125, "logps/rejected": -191.19906616210938, "loss": 0.697, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6601398587226868, "rewards/margins": 0.11734984815120697, "rewards/rejected": -0.7774897217750549, "step": 514 }, { "epoch": 0.67, "learning_rate": 4.5706774374171854e-05, "logits/chosen": -2.549034595489502, "logits/rejected": -2.645655632019043, "logps/chosen": -161.01344299316406, "logps/rejected": -181.75225830078125, "loss": 0.6994, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6926446557044983, "rewards/margins": 0.08336181193590164, "rewards/rejected": -0.7760064005851746, "step": 515 }, { "epoch": 0.68, "learning_rate": 4.56866764542187e-05, "logits/chosen": -2.6017000675201416, "logits/rejected": -2.6268606185913086, "logps/chosen": -196.15757751464844, "logps/rejected": -206.0800323486328, "loss": 0.7811, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7164332866668701, "rewards/margins": -0.03772084414958954, "rewards/rejected": -0.6787124872207642, "step": 516 }, { "epoch": 0.68, "learning_rate": 4.566653604205805e-05, "logits/chosen": -2.723987579345703, "logits/rejected": -2.8009376525878906, "logps/chosen": -170.0558624267578, "logps/rejected": -188.37606811523438, "loss": 0.5947, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8201983571052551, "rewards/margins": 0.3081669807434082, "rewards/rejected": -1.128365397453308, "step": 517 }, { "epoch": 0.68, "learning_rate": 4.5646353179060057e-05, "logits/chosen": -2.6936893463134766, "logits/rejected": -2.5472261905670166, "logps/chosen": -199.6653289794922, "logps/rejected": -171.17935180664062, "loss": 0.7299, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5833121538162231, "rewards/margins": 0.0050363726913928986, "rewards/rejected": -0.5883485674858093, "step": 518 }, { "epoch": 0.68, "learning_rate": 4.562612790668204e-05, "logits/chosen": -2.8099305629730225, "logits/rejected": -2.858193874359131, "logps/chosen": -191.54202270507812, "logps/rejected": -249.16433715820312, "loss": 0.7677, "rewards/accuracies": 0.375, "rewards/chosen": -0.9312639236450195, "rewards/margins": -0.06794437021017075, "rewards/rejected": -0.8633195161819458, "step": 519 }, { "epoch": 0.68, "learning_rate": 4.560586026646845e-05, "logits/chosen": -2.5662684440612793, "logits/rejected": -2.685896396636963, "logps/chosen": -165.303955078125, "logps/rejected": -192.7646484375, "loss": 0.8062, "rewards/accuracies": 0.375, "rewards/chosen": -0.7227790355682373, "rewards/margins": -0.14969179034233093, "rewards/rejected": -0.5730872750282288, "step": 520 }, { "epoch": 0.68, "learning_rate": 4.558555030005075e-05, "logits/chosen": -2.5218491554260254, "logits/rejected": -2.5660927295684814, "logps/chosen": -157.65261840820312, "logps/rejected": -177.45657348632812, "loss": 0.6963, "rewards/accuracies": 0.5, "rewards/chosen": -0.7852264642715454, "rewards/margins": 0.06401225179433823, "rewards/rejected": -0.8492387533187866, "step": 521 }, { "epoch": 0.68, "learning_rate": 4.556519804914736e-05, "logits/chosen": -2.7509756088256836, "logits/rejected": -2.6521804332733154, "logps/chosen": -178.81834411621094, "logps/rejected": -171.83724975585938, "loss": 0.6878, "rewards/accuracies": 0.5, "rewards/chosen": -0.5949856042861938, "rewards/margins": 0.05688241496682167, "rewards/rejected": -0.6518680453300476, "step": 522 }, { "epoch": 0.68, "learning_rate": 4.554480355556354e-05, "logits/chosen": -2.7910945415496826, "logits/rejected": -2.8190691471099854, "logps/chosen": -241.06617736816406, "logps/rejected": -238.115478515625, "loss": 0.7471, "rewards/accuracies": 0.5, "rewards/chosen": -0.7306909561157227, "rewards/margins": -0.022471264004707336, "rewards/rejected": -0.7082197070121765, "step": 523 }, { "epoch": 0.69, "learning_rate": 4.552436686119134e-05, "logits/chosen": -2.630984306335449, "logits/rejected": -2.585669994354248, "logps/chosen": -185.63345336914062, "logps/rejected": -161.37725830078125, "loss": 0.7325, "rewards/accuracies": 0.375, "rewards/chosen": -0.7190293073654175, "rewards/margins": -0.00044285133481025696, "rewards/rejected": -0.7185863852500916, "step": 524 }, { "epoch": 0.69, "learning_rate": 4.550388800800948e-05, "logits/chosen": -2.581951856613159, "logits/rejected": -2.5290775299072266, "logps/chosen": -272.72467041015625, "logps/rejected": -239.87452697753906, "loss": 0.7218, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7720150351524353, "rewards/margins": -0.0015503056347370148, "rewards/rejected": -0.7704647779464722, "step": 525 }, { "epoch": 0.69, "learning_rate": 4.548336703808328e-05, "logits/chosen": -2.6833231449127197, "logits/rejected": -2.715363025665283, "logps/chosen": -194.5189971923828, "logps/rejected": -201.06585693359375, "loss": 0.662, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7054163217544556, "rewards/margins": 0.18057569861412048, "rewards/rejected": -0.8859919309616089, "step": 526 }, { "epoch": 0.69, "learning_rate": 4.546280399356457e-05, "logits/chosen": -2.634359836578369, "logits/rejected": -2.760921001434326, "logps/chosen": -183.7833251953125, "logps/rejected": -212.13803100585938, "loss": 0.638, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7029492259025574, "rewards/margins": 0.2215665578842163, "rewards/rejected": -0.9245157837867737, "step": 527 }, { "epoch": 0.69, "learning_rate": 4.54421989166916e-05, "logits/chosen": -2.6926794052124023, "logits/rejected": -2.64449405670166, "logps/chosen": -185.593994140625, "logps/rejected": -182.61541748046875, "loss": 0.6729, "rewards/accuracies": 0.4375, "rewards/chosen": -0.913753092288971, "rewards/margins": 0.08913831412792206, "rewards/rejected": -1.0028914213180542, "step": 528 }, { "epoch": 0.69, "learning_rate": 4.542155184978898e-05, "logits/chosen": -2.6856396198272705, "logits/rejected": -2.7312099933624268, "logps/chosen": -134.99691772460938, "logps/rejected": -154.96502685546875, "loss": 0.6361, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5297491550445557, "rewards/margins": 0.20353960990905762, "rewards/rejected": -0.7332887649536133, "step": 529 }, { "epoch": 0.69, "learning_rate": 4.540086283526754e-05, "logits/chosen": -2.493201971054077, "logits/rejected": -2.410156488418579, "logps/chosen": -226.10279846191406, "logps/rejected": -228.56134033203125, "loss": 0.7409, "rewards/accuracies": 0.3125, "rewards/chosen": -0.9568287134170532, "rewards/margins": -0.040587056428194046, "rewards/rejected": -0.9162416458129883, "step": 530 }, { "epoch": 0.7, "learning_rate": 4.538013191562431e-05, "logits/chosen": -2.5931506156921387, "logits/rejected": -2.642500162124634, "logps/chosen": -173.00411987304688, "logps/rejected": -200.85208129882812, "loss": 0.7962, "rewards/accuracies": 0.5, "rewards/chosen": -0.7807260751724243, "rewards/margins": -0.091963991522789, "rewards/rejected": -0.6887621283531189, "step": 531 }, { "epoch": 0.7, "learning_rate": 4.5359359133442356e-05, "logits/chosen": -2.560065984725952, "logits/rejected": -2.5762548446655273, "logps/chosen": -172.8507843017578, "logps/rejected": -193.83258056640625, "loss": 0.7569, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7677234411239624, "rewards/margins": 0.06710009276866913, "rewards/rejected": -0.8348235487937927, "step": 532 }, { "epoch": 0.7, "learning_rate": 4.533854453139077e-05, "logits/chosen": -2.7403817176818848, "logits/rejected": -2.8105673789978027, "logps/chosen": -215.16644287109375, "logps/rejected": -243.01504516601562, "loss": 0.68, "rewards/accuracies": 0.5, "rewards/chosen": -0.6151594519615173, "rewards/margins": 0.12640537321567535, "rewards/rejected": -0.7415647506713867, "step": 533 }, { "epoch": 0.7, "learning_rate": 4.5317688152224515e-05, "logits/chosen": -2.3807733058929443, "logits/rejected": -2.496799945831299, "logps/chosen": -217.8972625732422, "logps/rejected": -202.62594604492188, "loss": 0.7236, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8797967433929443, "rewards/margins": 0.07766968756914139, "rewards/rejected": -0.9574664235115051, "step": 534 }, { "epoch": 0.7, "learning_rate": 4.52967900387844e-05, "logits/chosen": -2.475679636001587, "logits/rejected": -2.5895590782165527, "logps/chosen": -168.10928344726562, "logps/rejected": -231.15797424316406, "loss": 0.6107, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6619678139686584, "rewards/margins": 0.2175792008638382, "rewards/rejected": -0.8795469999313354, "step": 535 }, { "epoch": 0.7, "learning_rate": 4.5275850233996925e-05, "logits/chosen": -2.6471853256225586, "logits/rejected": -2.6549582481384277, "logps/chosen": -165.9920654296875, "logps/rejected": -146.60623168945312, "loss": 0.6382, "rewards/accuracies": 0.625, "rewards/chosen": -0.7165468335151672, "rewards/margins": 0.16847476363182068, "rewards/rejected": -0.8850216865539551, "step": 536 }, { "epoch": 0.7, "learning_rate": 4.525486878087426e-05, "logits/chosen": -2.6429080963134766, "logits/rejected": -2.658334732055664, "logps/chosen": -196.52894592285156, "logps/rejected": -198.9338836669922, "loss": 0.6805, "rewards/accuracies": 0.6875, "rewards/chosen": -0.759401798248291, "rewards/margins": 0.09011213481426239, "rewards/rejected": -0.8495139479637146, "step": 537 }, { "epoch": 0.7, "learning_rate": 4.523384572251409e-05, "logits/chosen": -2.7667698860168457, "logits/rejected": -2.823873996734619, "logps/chosen": -162.4055938720703, "logps/rejected": -157.1728057861328, "loss": 0.6762, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8268322348594666, "rewards/margins": 0.13257114589214325, "rewards/rejected": -0.9594033360481262, "step": 538 }, { "epoch": 0.71, "learning_rate": 4.52127811020996e-05, "logits/chosen": -2.6934759616851807, "logits/rejected": -2.6713662147521973, "logps/chosen": -232.28492736816406, "logps/rejected": -225.9707489013672, "loss": 0.6317, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7875734567642212, "rewards/margins": 0.22967377305030823, "rewards/rejected": -1.017247200012207, "step": 539 }, { "epoch": 0.71, "learning_rate": 4.5191674962899314e-05, "logits/chosen": -2.551192045211792, "logits/rejected": -2.4736685752868652, "logps/chosen": -186.871337890625, "logps/rejected": -182.81036376953125, "loss": 0.7075, "rewards/accuracies": 0.375, "rewards/chosen": -0.7137445211410522, "rewards/margins": -0.007666848599910736, "rewards/rejected": -0.7060777544975281, "step": 540 }, { "epoch": 0.71, "learning_rate": 4.5170527348267054e-05, "logits/chosen": -2.664978265762329, "logits/rejected": -2.723323345184326, "logps/chosen": -181.50009155273438, "logps/rejected": -218.21827697753906, "loss": 0.7018, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7352172136306763, "rewards/margins": 0.05204403027892113, "rewards/rejected": -0.7872611880302429, "step": 541 }, { "epoch": 0.71, "learning_rate": 4.5149338301641845e-05, "logits/chosen": -2.622648000717163, "logits/rejected": -2.658144474029541, "logps/chosen": -161.30264282226562, "logps/rejected": -163.90829467773438, "loss": 0.6798, "rewards/accuracies": 0.5, "rewards/chosen": -0.7303928136825562, "rewards/margins": 0.10052241384983063, "rewards/rejected": -0.8309152126312256, "step": 542 }, { "epoch": 0.71, "learning_rate": 4.512810786654779e-05, "logits/chosen": -2.7670962810516357, "logits/rejected": -2.738875389099121, "logps/chosen": -198.58111572265625, "logps/rejected": -200.0714569091797, "loss": 0.655, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6926169395446777, "rewards/margins": 0.17897403240203857, "rewards/rejected": -0.8715909719467163, "step": 543 }, { "epoch": 0.71, "learning_rate": 4.510683608659403e-05, "logits/chosen": -2.6602683067321777, "logits/rejected": -2.5785250663757324, "logps/chosen": -156.00543212890625, "logps/rejected": -148.37716674804688, "loss": 0.6658, "rewards/accuracies": 0.5625, "rewards/chosen": -0.49638113379478455, "rewards/margins": 0.09628915786743164, "rewards/rejected": -0.5926702618598938, "step": 544 }, { "epoch": 0.71, "learning_rate": 4.508552300547463e-05, "logits/chosen": -2.6110527515411377, "logits/rejected": -2.5978469848632812, "logps/chosen": -212.41812133789062, "logps/rejected": -218.17910766601562, "loss": 0.6521, "rewards/accuracies": 0.625, "rewards/chosen": -0.6882189512252808, "rewards/margins": 0.12167090177536011, "rewards/rejected": -0.8098899722099304, "step": 545 }, { "epoch": 0.71, "learning_rate": 4.506416866696848e-05, "logits/chosen": -2.706355571746826, "logits/rejected": -2.8541836738586426, "logps/chosen": -169.2959747314453, "logps/rejected": -200.96258544921875, "loss": 0.5834, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8061919808387756, "rewards/margins": 0.32039594650268555, "rewards/rejected": -1.126587986946106, "step": 546 }, { "epoch": 0.72, "learning_rate": 4.504277311493922e-05, "logits/chosen": -2.6320695877075195, "logits/rejected": -2.7103400230407715, "logps/chosen": -153.4121551513672, "logps/rejected": -177.87684631347656, "loss": 0.6849, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5557705163955688, "rewards/margins": 0.1074688509106636, "rewards/rejected": -0.6632393598556519, "step": 547 }, { "epoch": 0.72, "learning_rate": 4.502133639333516e-05, "logits/chosen": -2.779649019241333, "logits/rejected": -2.880277633666992, "logps/chosen": -169.7550506591797, "logps/rejected": -205.27264404296875, "loss": 0.735, "rewards/accuracies": 0.5, "rewards/chosen": -0.8947506546974182, "rewards/margins": -0.022921644151210785, "rewards/rejected": -0.8718290328979492, "step": 548 }, { "epoch": 0.72, "learning_rate": 4.499985854618915e-05, "logits/chosen": -2.786224126815796, "logits/rejected": -2.7625045776367188, "logps/chosen": -194.6786346435547, "logps/rejected": -171.3389434814453, "loss": 0.717, "rewards/accuracies": 0.375, "rewards/chosen": -0.7077013850212097, "rewards/margins": 0.03108108416199684, "rewards/rejected": -0.7387824058532715, "step": 549 }, { "epoch": 0.72, "learning_rate": 4.497833961761855e-05, "logits/chosen": -2.649867057800293, "logits/rejected": -2.616454839706421, "logps/chosen": -181.42636108398438, "logps/rejected": -187.4470672607422, "loss": 0.6947, "rewards/accuracies": 0.375, "rewards/chosen": -0.5831429958343506, "rewards/margins": 0.03419647365808487, "rewards/rejected": -0.617339551448822, "step": 550 }, { "epoch": 0.72, "learning_rate": 4.495677965182506e-05, "logits/chosen": -2.752405881881714, "logits/rejected": -2.801346778869629, "logps/chosen": -199.30345153808594, "logps/rejected": -204.13937377929688, "loss": 0.6993, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8525046706199646, "rewards/margins": 0.04431546479463577, "rewards/rejected": -0.8968201875686646, "step": 551 }, { "epoch": 0.72, "learning_rate": 4.4935178693094714e-05, "logits/chosen": -2.6438167095184326, "logits/rejected": -2.677015781402588, "logps/chosen": -176.24278259277344, "logps/rejected": -183.13050842285156, "loss": 0.5542, "rewards/accuracies": 0.875, "rewards/chosen": -0.5732276439666748, "rewards/margins": 0.34234461188316345, "rewards/rejected": -0.9155722856521606, "step": 552 }, { "epoch": 0.72, "learning_rate": 4.491353678579774e-05, "logits/chosen": -2.7374353408813477, "logits/rejected": -2.724957227706909, "logps/chosen": -168.60983276367188, "logps/rejected": -178.94168090820312, "loss": 0.6891, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7404319643974304, "rewards/margins": 0.09779490530490875, "rewards/rejected": -0.8382267951965332, "step": 553 }, { "epoch": 0.73, "learning_rate": 4.489185397438845e-05, "logits/chosen": -2.569044351577759, "logits/rejected": -2.4559781551361084, "logps/chosen": -122.60645294189453, "logps/rejected": -121.57733154296875, "loss": 0.7095, "rewards/accuracies": 0.5, "rewards/chosen": -0.5707786679267883, "rewards/margins": 0.018663479015231133, "rewards/rejected": -0.589442253112793, "step": 554 }, { "epoch": 0.73, "learning_rate": 4.4870130303405214e-05, "logits/chosen": -2.6766295433044434, "logits/rejected": -2.6980645656585693, "logps/chosen": -168.9379425048828, "logps/rejected": -186.576904296875, "loss": 0.6257, "rewards/accuracies": 0.625, "rewards/chosen": -0.5627405643463135, "rewards/margins": 0.19951972365379333, "rewards/rejected": -0.7622602581977844, "step": 555 }, { "epoch": 0.73, "learning_rate": 4.484836581747032e-05, "logits/chosen": -2.6670761108398438, "logits/rejected": -2.690931558609009, "logps/chosen": -201.3208770751953, "logps/rejected": -182.73782348632812, "loss": 0.7019, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6702454686164856, "rewards/margins": 0.044957034289836884, "rewards/rejected": -0.7152025103569031, "step": 556 }, { "epoch": 0.73, "learning_rate": 4.4826560561289865e-05, "logits/chosen": -2.5527472496032715, "logits/rejected": -2.5464069843292236, "logps/chosen": -188.2076416015625, "logps/rejected": -173.2044677734375, "loss": 0.6589, "rewards/accuracies": 0.625, "rewards/chosen": -0.6993139982223511, "rewards/margins": 0.14693805575370789, "rewards/rejected": -0.8462520837783813, "step": 557 }, { "epoch": 0.73, "learning_rate": 4.4804714579653736e-05, "logits/chosen": -2.694082736968994, "logits/rejected": -2.6660871505737305, "logps/chosen": -191.5103759765625, "logps/rejected": -182.28424072265625, "loss": 0.828, "rewards/accuracies": 0.5, "rewards/chosen": -0.8855656981468201, "rewards/margins": -0.18190453946590424, "rewards/rejected": -0.7036612033843994, "step": 558 }, { "epoch": 0.73, "learning_rate": 4.4782827917435454e-05, "logits/chosen": -2.8244099617004395, "logits/rejected": -2.811955690383911, "logps/chosen": -202.20606994628906, "logps/rejected": -215.3787841796875, "loss": 0.78, "rewards/accuracies": 0.5, "rewards/chosen": -0.9945522546768188, "rewards/margins": -0.01987658441066742, "rewards/rejected": -0.9746755957603455, "step": 559 }, { "epoch": 0.73, "learning_rate": 4.4760900619592085e-05, "logits/chosen": -2.6444430351257324, "logits/rejected": -2.6651206016540527, "logps/chosen": -176.60488891601562, "logps/rejected": -185.3902130126953, "loss": 0.6646, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8423606157302856, "rewards/margins": 0.12972018122673035, "rewards/rejected": -0.9720807075500488, "step": 560 }, { "epoch": 0.73, "learning_rate": 4.4738932731164194e-05, "logits/chosen": -2.6871867179870605, "logits/rejected": -2.693873405456543, "logps/chosen": -199.2325439453125, "logps/rejected": -196.27951049804688, "loss": 0.7607, "rewards/accuracies": 0.4375, "rewards/chosen": -0.791627049446106, "rewards/margins": -0.03532877564430237, "rewards/rejected": -0.7562982439994812, "step": 561 }, { "epoch": 0.74, "learning_rate": 4.47169242972757e-05, "logits/chosen": -2.7293338775634766, "logits/rejected": -2.7268292903900146, "logps/chosen": -210.5205078125, "logps/rejected": -198.358642578125, "loss": 0.8319, "rewards/accuracies": 0.5, "rewards/chosen": -0.9199119806289673, "rewards/margins": -0.1811569780111313, "rewards/rejected": -0.73875492811203, "step": 562 }, { "epoch": 0.74, "learning_rate": 4.469487536313381e-05, "logits/chosen": -2.591933012008667, "logits/rejected": -2.6240625381469727, "logps/chosen": -159.45765686035156, "logps/rejected": -182.98690795898438, "loss": 0.6585, "rewards/accuracies": 0.625, "rewards/chosen": -0.7039793729782104, "rewards/margins": 0.1428254246711731, "rewards/rejected": -0.8468047380447388, "step": 563 }, { "epoch": 0.74, "learning_rate": 4.467278597402894e-05, "logits/chosen": -2.5494794845581055, "logits/rejected": -2.541128635406494, "logps/chosen": -163.43701171875, "logps/rejected": -166.59567260742188, "loss": 0.6618, "rewards/accuracies": 0.625, "rewards/chosen": -0.5548999905586243, "rewards/margins": 0.15531884133815765, "rewards/rejected": -0.7102188467979431, "step": 564 }, { "epoch": 0.74, "learning_rate": 4.465065617533457e-05, "logits/chosen": -2.6077260971069336, "logits/rejected": -2.5411581993103027, "logps/chosen": -205.11749267578125, "logps/rejected": -210.03114318847656, "loss": 0.7476, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7790584564208984, "rewards/margins": 0.018771033734083176, "rewards/rejected": -0.7978294491767883, "step": 565 }, { "epoch": 0.74, "learning_rate": 4.462848601250722e-05, "logits/chosen": -2.645805835723877, "logits/rejected": -2.5978736877441406, "logps/chosen": -155.65281677246094, "logps/rejected": -156.344970703125, "loss": 0.6091, "rewards/accuracies": 0.75, "rewards/chosen": -0.5091183185577393, "rewards/margins": 0.21079742908477783, "rewards/rejected": -0.7199157476425171, "step": 566 }, { "epoch": 0.74, "learning_rate": 4.4606275531086295e-05, "logits/chosen": -2.783046007156372, "logits/rejected": -2.771733522415161, "logps/chosen": -192.69659423828125, "logps/rejected": -176.84600830078125, "loss": 0.8056, "rewards/accuracies": 0.3125, "rewards/chosen": -0.7212276458740234, "rewards/margins": -0.1634465754032135, "rewards/rejected": -0.5577811002731323, "step": 567 }, { "epoch": 0.74, "learning_rate": 4.4584024776694035e-05, "logits/chosen": -2.6829848289489746, "logits/rejected": -2.69008731842041, "logps/chosen": -160.82737731933594, "logps/rejected": -197.73060607910156, "loss": 0.6111, "rewards/accuracies": 0.75, "rewards/chosen": -0.5301526188850403, "rewards/margins": 0.34297025203704834, "rewards/rejected": -0.8731229305267334, "step": 568 }, { "epoch": 0.74, "learning_rate": 4.45617337950354e-05, "logits/chosen": -2.56026029586792, "logits/rejected": -2.687854290008545, "logps/chosen": -168.34933471679688, "logps/rejected": -196.67616271972656, "loss": 0.6421, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6444015502929688, "rewards/margins": 0.2516985237598419, "rewards/rejected": -0.8961001038551331, "step": 569 }, { "epoch": 0.75, "learning_rate": 4.453940263189797e-05, "logits/chosen": -2.7095446586608887, "logits/rejected": -2.6890676021575928, "logps/chosen": -207.63217163085938, "logps/rejected": -215.61146545410156, "loss": 0.6531, "rewards/accuracies": 0.625, "rewards/chosen": -0.670657753944397, "rewards/margins": 0.11775386333465576, "rewards/rejected": -0.7884116172790527, "step": 570 }, { "epoch": 0.75, "learning_rate": 4.4517031333151874e-05, "logits/chosen": -2.6166491508483887, "logits/rejected": -2.618901014328003, "logps/chosen": -162.6239776611328, "logps/rejected": -197.70130920410156, "loss": 0.6682, "rewards/accuracies": 0.625, "rewards/chosen": -0.7643678784370422, "rewards/margins": 0.09301282465457916, "rewards/rejected": -0.857380747795105, "step": 571 }, { "epoch": 0.75, "learning_rate": 4.449461994474968e-05, "logits/chosen": -2.703096866607666, "logits/rejected": -2.726106882095337, "logps/chosen": -142.5395965576172, "logps/rejected": -187.09109497070312, "loss": 0.6036, "rewards/accuracies": 0.625, "rewards/chosen": -0.5837680697441101, "rewards/margins": 0.24383926391601562, "rewards/rejected": -0.8276073336601257, "step": 572 }, { "epoch": 0.75, "learning_rate": 4.44721685127263e-05, "logits/chosen": -2.6250483989715576, "logits/rejected": -2.651967763900757, "logps/chosen": -170.19578552246094, "logps/rejected": -179.1065673828125, "loss": 0.6918, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6374863386154175, "rewards/margins": 0.08425635099411011, "rewards/rejected": -0.7217426300048828, "step": 573 }, { "epoch": 0.75, "learning_rate": 4.4449677083198896e-05, "logits/chosen": -2.602550745010376, "logits/rejected": -2.712120294570923, "logps/chosen": -157.83306884765625, "logps/rejected": -180.02627563476562, "loss": 0.6177, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6926807761192322, "rewards/margins": 0.24708034098148346, "rewards/rejected": -0.9397611618041992, "step": 574 }, { "epoch": 0.75, "learning_rate": 4.4427145702366804e-05, "logits/chosen": -2.6738860607147217, "logits/rejected": -2.696126937866211, "logps/chosen": -144.033203125, "logps/rejected": -177.09173583984375, "loss": 0.6195, "rewards/accuracies": 0.625, "rewards/chosen": -0.41337087750434875, "rewards/margins": 0.2742076814174652, "rewards/rejected": -0.6875784993171692, "step": 575 }, { "epoch": 0.75, "learning_rate": 4.440457441651139e-05, "logits/chosen": -2.5672965049743652, "logits/rejected": -2.638249635696411, "logps/chosen": -120.28506469726562, "logps/rejected": -148.57440185546875, "loss": 0.6443, "rewards/accuracies": 0.5, "rewards/chosen": -0.5808289051055908, "rewards/margins": 0.20668093860149384, "rewards/rejected": -0.7875099182128906, "step": 576 }, { "epoch": 0.76, "learning_rate": 4.4381963271996044e-05, "logits/chosen": -2.580237865447998, "logits/rejected": -2.6456117630004883, "logps/chosen": -178.21829223632812, "logps/rejected": -236.28274536132812, "loss": 0.6341, "rewards/accuracies": 0.5, "rewards/chosen": -0.7112021446228027, "rewards/margins": 0.18514475226402283, "rewards/rejected": -0.896346926689148, "step": 577 }, { "epoch": 0.76, "learning_rate": 4.435931231526597e-05, "logits/chosen": -2.65242075920105, "logits/rejected": -2.6544103622436523, "logps/chosen": -211.60411071777344, "logps/rejected": -235.25247192382812, "loss": 0.7102, "rewards/accuracies": 0.4375, "rewards/chosen": -0.874238908290863, "rewards/margins": 0.050793394446372986, "rewards/rejected": -0.9250323176383972, "step": 578 }, { "epoch": 0.76, "learning_rate": 4.433662159284818e-05, "logits/chosen": -2.6615688800811768, "logits/rejected": -2.6471145153045654, "logps/chosen": -191.8739471435547, "logps/rejected": -193.9615020751953, "loss": 0.7658, "rewards/accuracies": 0.5, "rewards/chosen": -0.7925047874450684, "rewards/margins": -0.02974768541753292, "rewards/rejected": -0.7627571225166321, "step": 579 }, { "epoch": 0.76, "learning_rate": 4.4313891151351375e-05, "logits/chosen": -2.7032251358032227, "logits/rejected": -2.746817111968994, "logps/chosen": -137.22633361816406, "logps/rejected": -143.8455352783203, "loss": 0.6467, "rewards/accuracies": 0.5, "rewards/chosen": -0.762434184551239, "rewards/margins": 0.17979009449481964, "rewards/rejected": -0.9422242641448975, "step": 580 }, { "epoch": 0.76, "learning_rate": 4.429112103746582e-05, "logits/chosen": -2.7502665519714355, "logits/rejected": -2.6706719398498535, "logps/chosen": -173.37606811523438, "logps/rejected": -231.8168182373047, "loss": 0.6431, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7993868589401245, "rewards/margins": 0.17561480402946472, "rewards/rejected": -0.9750015735626221, "step": 581 }, { "epoch": 0.76, "learning_rate": 4.4268311297963295e-05, "logits/chosen": -2.752943754196167, "logits/rejected": -2.7461259365081787, "logps/chosen": -194.4534912109375, "logps/rejected": -190.17312622070312, "loss": 0.7105, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8308650851249695, "rewards/margins": 0.08960982412099838, "rewards/rejected": -0.9204750061035156, "step": 582 }, { "epoch": 0.76, "learning_rate": 4.4245461979696937e-05, "logits/chosen": -2.6702442169189453, "logits/rejected": -2.6548166275024414, "logps/chosen": -146.55252075195312, "logps/rejected": -167.94862365722656, "loss": 0.6055, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6801164150238037, "rewards/margins": 0.27670958638191223, "rewards/rejected": -0.9568260312080383, "step": 583 }, { "epoch": 0.76, "learning_rate": 4.422257312960123e-05, "logits/chosen": -2.6083147525787354, "logits/rejected": -2.590127944946289, "logps/chosen": -203.12689208984375, "logps/rejected": -296.73431396484375, "loss": 0.6476, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9686077833175659, "rewards/margins": 0.2269180417060852, "rewards/rejected": -1.1955257654190063, "step": 584 }, { "epoch": 0.77, "learning_rate": 4.419964479469182e-05, "logits/chosen": -2.757424831390381, "logits/rejected": -2.861577272415161, "logps/chosen": -225.32345581054688, "logps/rejected": -259.8649597167969, "loss": 0.6847, "rewards/accuracies": 0.5625, "rewards/chosen": -1.280800700187683, "rewards/margins": 0.11630728095769882, "rewards/rejected": -1.3971078395843506, "step": 585 }, { "epoch": 0.77, "learning_rate": 4.417667702206548e-05, "logits/chosen": -2.7444422245025635, "logits/rejected": -2.7280027866363525, "logps/chosen": -192.2440185546875, "logps/rejected": -232.78993225097656, "loss": 0.6904, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8600846529006958, "rewards/margins": 0.0870635136961937, "rewards/rejected": -0.9471482038497925, "step": 586 }, { "epoch": 0.77, "learning_rate": 4.415366985889998e-05, "logits/chosen": -2.7146828174591064, "logits/rejected": -2.74285888671875, "logps/chosen": -218.16299438476562, "logps/rejected": -260.4196472167969, "loss": 0.586, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9427146911621094, "rewards/margins": 0.3159841001033783, "rewards/rejected": -1.2586987018585205, "step": 587 }, { "epoch": 0.77, "learning_rate": 4.413062335245402e-05, "logits/chosen": -2.773749589920044, "logits/rejected": -2.797802686691284, "logps/chosen": -172.09722900390625, "logps/rejected": -212.4481658935547, "loss": 0.6053, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6340043544769287, "rewards/margins": 0.2507874667644501, "rewards/rejected": -0.884791910648346, "step": 588 }, { "epoch": 0.77, "learning_rate": 4.410753755006708e-05, "logits/chosen": -2.5004916191101074, "logits/rejected": -2.52752685546875, "logps/chosen": -148.71890258789062, "logps/rejected": -165.1573944091797, "loss": 0.5859, "rewards/accuracies": 0.75, "rewards/chosen": -0.6494738459587097, "rewards/margins": 0.30606481432914734, "rewards/rejected": -0.9555386304855347, "step": 589 }, { "epoch": 0.77, "learning_rate": 4.408441249915938e-05, "logits/chosen": -2.7454075813293457, "logits/rejected": -2.7361526489257812, "logps/chosen": -173.478271484375, "logps/rejected": -190.50579833984375, "loss": 0.7229, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8305282592773438, "rewards/margins": 0.005613129585981369, "rewards/rejected": -0.8361413478851318, "step": 590 }, { "epoch": 0.77, "learning_rate": 4.4061248247231776e-05, "logits/chosen": -2.5979197025299072, "logits/rejected": -2.723015308380127, "logps/chosen": -188.89573669433594, "logps/rejected": -205.31224060058594, "loss": 0.5976, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0508185625076294, "rewards/margins": 0.3824726641178131, "rewards/rejected": -1.4332913160324097, "step": 591 }, { "epoch": 0.77, "learning_rate": 4.4038044841865614e-05, "logits/chosen": -2.586750030517578, "logits/rejected": -2.719773054122925, "logps/chosen": -157.79367065429688, "logps/rejected": -174.4828643798828, "loss": 0.7857, "rewards/accuracies": 0.5, "rewards/chosen": -1.0488148927688599, "rewards/margins": 0.035450052469968796, "rewards/rejected": -1.084264874458313, "step": 592 }, { "epoch": 0.78, "learning_rate": 4.401480233072268e-05, "logits/chosen": -2.6652653217315674, "logits/rejected": -2.6724514961242676, "logps/chosen": -178.22274780273438, "logps/rejected": -182.68753051757812, "loss": 0.8022, "rewards/accuracies": 0.375, "rewards/chosen": -1.05448317527771, "rewards/margins": -0.15002277493476868, "rewards/rejected": -0.9044604897499084, "step": 593 }, { "epoch": 0.78, "learning_rate": 4.399152076154509e-05, "logits/chosen": -2.73130202293396, "logits/rejected": -2.709644079208374, "logps/chosen": -179.2704620361328, "logps/rejected": -201.22584533691406, "loss": 0.6426, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1346244812011719, "rewards/margins": 0.15731468796730042, "rewards/rejected": -1.2919390201568604, "step": 594 }, { "epoch": 0.78, "learning_rate": 4.396820018215518e-05, "logits/chosen": -2.703145742416382, "logits/rejected": -2.724557876586914, "logps/chosen": -185.3358154296875, "logps/rejected": -200.76834106445312, "loss": 0.6528, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8781923055648804, "rewards/margins": 0.12637923657894135, "rewards/rejected": -1.004571557044983, "step": 595 }, { "epoch": 0.78, "learning_rate": 4.394484064045542e-05, "logits/chosen": -2.7452504634857178, "logits/rejected": -2.7653212547302246, "logps/chosen": -172.15745544433594, "logps/rejected": -198.72596740722656, "loss": 0.6422, "rewards/accuracies": 0.75, "rewards/chosen": -1.0150758028030396, "rewards/margins": 0.16777384281158447, "rewards/rejected": -1.182849645614624, "step": 596 }, { "epoch": 0.78, "learning_rate": 4.392144218442831e-05, "logits/chosen": -2.7004809379577637, "logits/rejected": -2.697211980819702, "logps/chosen": -219.4707794189453, "logps/rejected": -230.44607543945312, "loss": 0.76, "rewards/accuracies": 0.5625, "rewards/chosen": -1.201088309288025, "rewards/margins": -0.015895769000053406, "rewards/rejected": -1.185192584991455, "step": 597 }, { "epoch": 0.78, "learning_rate": 4.3898004862136286e-05, "logits/chosen": -2.7089436054229736, "logits/rejected": -2.7471840381622314, "logps/chosen": -178.98870849609375, "logps/rejected": -186.9143829345703, "loss": 0.7313, "rewards/accuracies": 0.5, "rewards/chosen": -1.2119524478912354, "rewards/margins": 0.017535462975502014, "rewards/rejected": -1.2294878959655762, "step": 598 }, { "epoch": 0.78, "learning_rate": 4.3874528721721624e-05, "logits/chosen": -2.684823751449585, "logits/rejected": -2.6925339698791504, "logps/chosen": -177.93600463867188, "logps/rejected": -196.6461181640625, "loss": 0.6405, "rewards/accuracies": 0.625, "rewards/chosen": -0.6519026160240173, "rewards/margins": 0.23315802216529846, "rewards/rejected": -0.8850606679916382, "step": 599 }, { "epoch": 0.79, "learning_rate": 4.385101381140633e-05, "logits/chosen": -2.646517038345337, "logits/rejected": -2.6656806468963623, "logps/chosen": -133.48558044433594, "logps/rejected": -172.92977905273438, "loss": 0.5829, "rewards/accuracies": 0.625, "rewards/chosen": -0.7560878992080688, "rewards/margins": 0.29395031929016113, "rewards/rejected": -1.0500380992889404, "step": 600 }, { "epoch": 0.79, "learning_rate": 4.382746017949203e-05, "logits/chosen": -2.566793441772461, "logits/rejected": -2.5946078300476074, "logps/chosen": -183.47927856445312, "logps/rejected": -224.47052001953125, "loss": 0.5809, "rewards/accuracies": 0.75, "rewards/chosen": -0.7541132569313049, "rewards/margins": 0.33301299810409546, "rewards/rejected": -1.08712637424469, "step": 601 }, { "epoch": 0.79, "learning_rate": 4.380386787435992e-05, "logits/chosen": -2.5148720741271973, "logits/rejected": -2.431692600250244, "logps/chosen": -172.89552307128906, "logps/rejected": -179.71328735351562, "loss": 0.6558, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9377825260162354, "rewards/margins": 0.14634236693382263, "rewards/rejected": -1.0841249227523804, "step": 602 }, { "epoch": 0.79, "learning_rate": 4.378023694447061e-05, "logits/chosen": -2.536116600036621, "logits/rejected": -2.6387391090393066, "logps/chosen": -143.99391174316406, "logps/rejected": -181.4296875, "loss": 0.5612, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8889681100845337, "rewards/margins": 0.41685062646865845, "rewards/rejected": -1.3058186769485474, "step": 603 }, { "epoch": 0.79, "learning_rate": 4.375656743836407e-05, "logits/chosen": -2.74141526222229, "logits/rejected": -2.6809630393981934, "logps/chosen": -208.21409606933594, "logps/rejected": -211.6444549560547, "loss": 0.8776, "rewards/accuracies": 0.4375, "rewards/chosen": -1.2066153287887573, "rewards/margins": -0.26936572790145874, "rewards/rejected": -0.9372495412826538, "step": 604 }, { "epoch": 0.79, "learning_rate": 4.373285940465948e-05, "logits/chosen": -2.6372122764587402, "logits/rejected": -2.668998956680298, "logps/chosen": -201.66514587402344, "logps/rejected": -199.07876586914062, "loss": 0.7134, "rewards/accuracies": 0.5625, "rewards/chosen": -1.0678907632827759, "rewards/margins": 0.08854639530181885, "rewards/rejected": -1.1564371585845947, "step": 605 }, { "epoch": 0.79, "learning_rate": 4.370911289205518e-05, "logits/chosen": -2.638387680053711, "logits/rejected": -2.5716373920440674, "logps/chosen": -205.70352172851562, "logps/rejected": -193.94918823242188, "loss": 0.8545, "rewards/accuracies": 0.5, "rewards/chosen": -1.4150516986846924, "rewards/margins": -0.1472320556640625, "rewards/rejected": -1.2678195238113403, "step": 606 }, { "epoch": 0.79, "learning_rate": 4.368532794932854e-05, "logits/chosen": -2.53859543800354, "logits/rejected": -2.731680154800415, "logps/chosen": -164.7539520263672, "logps/rejected": -206.26397705078125, "loss": 0.7565, "rewards/accuracies": 0.5, "rewards/chosen": -1.2516262531280518, "rewards/margins": 0.0020126476883888245, "rewards/rejected": -1.2536388635635376, "step": 607 }, { "epoch": 0.8, "learning_rate": 4.366150462533588e-05, "logits/chosen": -2.729710340499878, "logits/rejected": -2.6896419525146484, "logps/chosen": -191.62673950195312, "logps/rejected": -183.93675231933594, "loss": 0.7964, "rewards/accuracies": 0.4375, "rewards/chosen": -1.2322921752929688, "rewards/margins": -0.07104435563087463, "rewards/rejected": -1.1612478494644165, "step": 608 }, { "epoch": 0.8, "learning_rate": 4.363764296901234e-05, "logits/chosen": -2.5539419651031494, "logits/rejected": -2.580127239227295, "logps/chosen": -188.94125366210938, "logps/rejected": -215.50633239746094, "loss": 0.665, "rewards/accuracies": 0.5625, "rewards/chosen": -1.0811361074447632, "rewards/margins": 0.14258897304534912, "rewards/rejected": -1.2237250804901123, "step": 609 }, { "epoch": 0.8, "learning_rate": 4.361374302937182e-05, "logits/chosen": -2.505577802658081, "logits/rejected": -2.5432798862457275, "logps/chosen": -220.49464416503906, "logps/rejected": -237.48265075683594, "loss": 0.7922, "rewards/accuracies": 0.4375, "rewards/chosen": -1.2271060943603516, "rewards/margins": -0.0697195902466774, "rewards/rejected": -1.1573865413665771, "step": 610 }, { "epoch": 0.8, "learning_rate": 4.358980485550683e-05, "logits/chosen": -2.7005603313446045, "logits/rejected": -2.764317274093628, "logps/chosen": -191.95233154296875, "logps/rejected": -204.19862365722656, "loss": 0.5799, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1303952932357788, "rewards/margins": 0.4044473469257355, "rewards/rejected": -1.5348427295684814, "step": 611 }, { "epoch": 0.8, "learning_rate": 4.356582849658845e-05, "logits/chosen": -2.6627776622772217, "logits/rejected": -2.712620258331299, "logps/chosen": -246.655517578125, "logps/rejected": -261.1457214355469, "loss": 0.737, "rewards/accuracies": 0.5625, "rewards/chosen": -1.0685715675354004, "rewards/margins": 0.14301545917987823, "rewards/rejected": -1.2115869522094727, "step": 612 }, { "epoch": 0.8, "learning_rate": 4.354181400186617e-05, "logits/chosen": -2.617241382598877, "logits/rejected": -2.6537880897521973, "logps/chosen": -179.02081298828125, "logps/rejected": -185.8905792236328, "loss": 0.6874, "rewards/accuracies": 0.5, "rewards/chosen": -1.0464906692504883, "rewards/margins": 0.13806383311748505, "rewards/rejected": -1.1845545768737793, "step": 613 }, { "epoch": 0.8, "learning_rate": 4.351776142066782e-05, "logits/chosen": -2.577658176422119, "logits/rejected": -2.62485933303833, "logps/chosen": -187.30638122558594, "logps/rejected": -213.94476318359375, "loss": 0.6266, "rewards/accuracies": 0.625, "rewards/chosen": -0.8167380094528198, "rewards/margins": 0.23687534034252167, "rewards/rejected": -1.053613305091858, "step": 614 }, { "epoch": 0.8, "learning_rate": 4.349367080239946e-05, "logits/chosen": -2.627676248550415, "logits/rejected": -2.682474136352539, "logps/chosen": -146.130615234375, "logps/rejected": -170.17799377441406, "loss": 0.5688, "rewards/accuracies": 0.625, "rewards/chosen": -0.8195935487747192, "rewards/margins": 0.3726154565811157, "rewards/rejected": -1.1922091245651245, "step": 615 }, { "epoch": 0.81, "learning_rate": 4.34695421965453e-05, "logits/chosen": -2.5791420936584473, "logits/rejected": -2.5302531719207764, "logps/chosen": -175.85426330566406, "logps/rejected": -170.61648559570312, "loss": 0.7291, "rewards/accuracies": 0.4375, "rewards/chosen": -1.085453748703003, "rewards/margins": -0.03215315565466881, "rewards/rejected": -1.0533006191253662, "step": 616 }, { "epoch": 0.81, "learning_rate": 4.344537565266755e-05, "logits/chosen": -2.757708787918091, "logits/rejected": -2.792357921600342, "logps/chosen": -217.09375, "logps/rejected": -204.7152099609375, "loss": 0.6347, "rewards/accuracies": 0.625, "rewards/chosen": -0.9034044742584229, "rewards/margins": 0.4320634603500366, "rewards/rejected": -1.335468053817749, "step": 617 }, { "epoch": 0.81, "learning_rate": 4.342117122040637e-05, "logits/chosen": -2.6127302646636963, "logits/rejected": -2.6188175678253174, "logps/chosen": -166.92210388183594, "logps/rejected": -188.92709350585938, "loss": 0.7474, "rewards/accuracies": 0.5, "rewards/chosen": -1.006317138671875, "rewards/margins": -0.026082061231136322, "rewards/rejected": -0.9802349805831909, "step": 618 }, { "epoch": 0.81, "learning_rate": 4.339692894947974e-05, "logits/chosen": -2.401575803756714, "logits/rejected": -2.467298984527588, "logps/chosen": -161.87429809570312, "logps/rejected": -187.6583251953125, "loss": 0.6542, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7750260829925537, "rewards/margins": 0.2767452895641327, "rewards/rejected": -1.0517714023590088, "step": 619 }, { "epoch": 0.81, "learning_rate": 4.3372648889683364e-05, "logits/chosen": -2.485372543334961, "logits/rejected": -2.494091510772705, "logps/chosen": -135.72772216796875, "logps/rejected": -164.1346435546875, "loss": 0.5291, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5809693336486816, "rewards/margins": 0.3957809805870056, "rewards/rejected": -0.9767501354217529, "step": 620 }, { "epoch": 0.81, "learning_rate": 4.334833109089057e-05, "logits/chosen": -2.7087979316711426, "logits/rejected": -2.6643896102905273, "logps/chosen": -227.6691131591797, "logps/rejected": -246.98077392578125, "loss": 0.6771, "rewards/accuracies": 0.5, "rewards/chosen": -0.9124298095703125, "rewards/margins": 0.15321853756904602, "rewards/rejected": -1.0656484365463257, "step": 621 }, { "epoch": 0.81, "learning_rate": 4.33239756030522e-05, "logits/chosen": -2.490464687347412, "logits/rejected": -2.57242488861084, "logps/chosen": -236.5983428955078, "logps/rejected": -217.5277862548828, "loss": 0.6671, "rewards/accuracies": 0.625, "rewards/chosen": -0.7107295393943787, "rewards/margins": 0.13387471437454224, "rewards/rejected": -0.8446043133735657, "step": 622 }, { "epoch": 0.82, "learning_rate": 4.329958247619651e-05, "logits/chosen": -2.6979587078094482, "logits/rejected": -2.720431089401245, "logps/chosen": -152.257080078125, "logps/rejected": -158.9082489013672, "loss": 0.5986, "rewards/accuracies": 0.6875, "rewards/chosen": -0.735335648059845, "rewards/margins": 0.4063275456428528, "rewards/rejected": -1.1416633129119873, "step": 623 }, { "epoch": 0.82, "learning_rate": 4.3275151760429075e-05, "logits/chosen": -2.494333267211914, "logits/rejected": -2.522716522216797, "logps/chosen": -171.63621520996094, "logps/rejected": -163.959716796875, "loss": 0.6896, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7268607020378113, "rewards/margins": 0.06785643100738525, "rewards/rejected": -0.7947170734405518, "step": 624 }, { "epoch": 0.82, "learning_rate": 4.325068350593268e-05, "logits/chosen": -2.4268102645874023, "logits/rejected": -2.4749088287353516, "logps/chosen": -178.3427734375, "logps/rejected": -186.25009155273438, "loss": 0.6225, "rewards/accuracies": 0.5, "rewards/chosen": -0.5268755555152893, "rewards/margins": 0.207400843501091, "rewards/rejected": -0.7342764139175415, "step": 625 }, { "epoch": 0.82, "learning_rate": 4.322617776296723e-05, "logits/chosen": -2.6472067832946777, "logits/rejected": -2.662853717803955, "logps/chosen": -194.68817138671875, "logps/rejected": -203.64810180664062, "loss": 0.7129, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7313871383666992, "rewards/margins": 0.030208323150873184, "rewards/rejected": -0.7615953683853149, "step": 626 }, { "epoch": 0.82, "learning_rate": 4.320163458186961e-05, "logits/chosen": -2.5693845748901367, "logits/rejected": -2.5331075191497803, "logps/chosen": -169.76651000976562, "logps/rejected": -178.60678100585938, "loss": 0.658, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8332303762435913, "rewards/margins": 0.1909269392490387, "rewards/rejected": -1.0241572856903076, "step": 627 }, { "epoch": 0.82, "learning_rate": 4.317705401305362e-05, "logits/chosen": -2.318687915802002, "logits/rejected": -2.345287322998047, "logps/chosen": -218.33798217773438, "logps/rejected": -217.30389404296875, "loss": 0.6021, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8331419825553894, "rewards/margins": 0.3256155550479889, "rewards/rejected": -1.1587576866149902, "step": 628 }, { "epoch": 0.82, "learning_rate": 4.315243610700986e-05, "logits/chosen": -2.685421943664551, "logits/rejected": -2.777416944503784, "logps/chosen": -217.2689971923828, "logps/rejected": -223.85684204101562, "loss": 0.6324, "rewards/accuracies": 0.75, "rewards/chosen": -1.0812008380889893, "rewards/margins": 0.19831162691116333, "rewards/rejected": -1.2795124053955078, "step": 629 }, { "epoch": 0.82, "learning_rate": 4.312778091430563e-05, "logits/chosen": -2.4046859741210938, "logits/rejected": -2.7304351329803467, "logps/chosen": -157.87550354003906, "logps/rejected": -225.56103515625, "loss": 0.6807, "rewards/accuracies": 0.5, "rewards/chosen": -0.9263030886650085, "rewards/margins": 0.20371997356414795, "rewards/rejected": -1.1300231218338013, "step": 630 }, { "epoch": 0.83, "learning_rate": 4.310308848558479e-05, "logits/chosen": -2.4605460166931152, "logits/rejected": -2.577059030532837, "logps/chosen": -147.3010711669922, "logps/rejected": -183.1886444091797, "loss": 0.5799, "rewards/accuracies": 0.75, "rewards/chosen": -0.6091120839118958, "rewards/margins": 0.33578452467918396, "rewards/rejected": -0.9448965787887573, "step": 631 }, { "epoch": 0.83, "learning_rate": 4.3078358871567706e-05, "logits/chosen": -2.5022783279418945, "logits/rejected": -2.6256165504455566, "logps/chosen": -154.47483825683594, "logps/rejected": -203.1712188720703, "loss": 0.696, "rewards/accuracies": 0.625, "rewards/chosen": -0.8408339023590088, "rewards/margins": 0.0674683153629303, "rewards/rejected": -0.9083021879196167, "step": 632 }, { "epoch": 0.83, "learning_rate": 4.305359212305115e-05, "logits/chosen": -2.570892095565796, "logits/rejected": -2.6330907344818115, "logps/chosen": -152.4366455078125, "logps/rejected": -172.27908325195312, "loss": 0.6883, "rewards/accuracies": 0.5, "rewards/chosen": -0.6601619124412537, "rewards/margins": 0.09768363833427429, "rewards/rejected": -0.7578455209732056, "step": 633 }, { "epoch": 0.83, "learning_rate": 4.302878829090813e-05, "logits/chosen": -2.5576834678649902, "logits/rejected": -2.6245827674865723, "logps/chosen": -217.8241424560547, "logps/rejected": -200.61273193359375, "loss": 0.7646, "rewards/accuracies": 0.4375, "rewards/chosen": -0.9752216339111328, "rewards/margins": -0.06534964591264725, "rewards/rejected": -0.9098719358444214, "step": 634 }, { "epoch": 0.83, "learning_rate": 4.300394742608784e-05, "logits/chosen": -2.4099199771881104, "logits/rejected": -2.4199118614196777, "logps/chosen": -137.7400665283203, "logps/rejected": -167.20249938964844, "loss": 0.6885, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8331056833267212, "rewards/margins": 0.23110516369342804, "rewards/rejected": -1.0642107725143433, "step": 635 }, { "epoch": 0.83, "learning_rate": 4.2979069579615564e-05, "logits/chosen": -2.5262954235076904, "logits/rejected": -2.6143667697906494, "logps/chosen": -173.13040161132812, "logps/rejected": -237.11602783203125, "loss": 0.7272, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6415895819664001, "rewards/margins": 0.039109162986278534, "rewards/rejected": -0.6806987524032593, "step": 636 }, { "epoch": 0.83, "learning_rate": 4.2954154802592514e-05, "logits/chosen": -2.4960150718688965, "logits/rejected": -2.578279733657837, "logps/chosen": -139.1173095703125, "logps/rejected": -168.22586059570312, "loss": 0.6952, "rewards/accuracies": 0.4375, "rewards/chosen": -0.9741197824478149, "rewards/margins": 0.2649148106575012, "rewards/rejected": -1.2390345335006714, "step": 637 }, { "epoch": 0.84, "learning_rate": 4.292920314619578e-05, "logits/chosen": -2.573779821395874, "logits/rejected": -2.5826001167297363, "logps/chosen": -200.8004913330078, "logps/rejected": -207.52462768554688, "loss": 0.7511, "rewards/accuracies": 0.5, "rewards/chosen": -0.8658602833747864, "rewards/margins": -0.03227938339114189, "rewards/rejected": -0.8335809707641602, "step": 638 }, { "epoch": 0.84, "learning_rate": 4.290421466167822e-05, "logits/chosen": -2.225033760070801, "logits/rejected": -2.293822765350342, "logps/chosen": -223.53115844726562, "logps/rejected": -244.39378356933594, "loss": 0.6364, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5871797204017639, "rewards/margins": 0.15462249517440796, "rewards/rejected": -0.7418022155761719, "step": 639 }, { "epoch": 0.84, "learning_rate": 4.2879189400368314e-05, "logits/chosen": -2.5370843410491943, "logits/rejected": -2.6005024909973145, "logps/chosen": -185.48890686035156, "logps/rejected": -200.16712951660156, "loss": 0.6332, "rewards/accuracies": 0.625, "rewards/chosen": -0.8029256463050842, "rewards/margins": 0.19223058223724365, "rewards/rejected": -0.9951562881469727, "step": 640 }, { "epoch": 0.84, "learning_rate": 4.2854127413670096e-05, "logits/chosen": -2.5340185165405273, "logits/rejected": -2.466094732284546, "logps/chosen": -166.66929626464844, "logps/rejected": -157.72235107421875, "loss": 0.9073, "rewards/accuracies": 0.375, "rewards/chosen": -1.1363290548324585, "rewards/margins": -0.30132997035980225, "rewards/rejected": -0.8349990248680115, "step": 641 }, { "epoch": 0.84, "learning_rate": 4.282902875306304e-05, "logits/chosen": -2.4717962741851807, "logits/rejected": -2.5328433513641357, "logps/chosen": -135.24365234375, "logps/rejected": -178.5400390625, "loss": 0.6674, "rewards/accuracies": 0.5, "rewards/chosen": -0.42678678035736084, "rewards/margins": 0.11809446662664413, "rewards/rejected": -0.5448811650276184, "step": 642 }, { "epoch": 0.84, "learning_rate": 4.280389347010194e-05, "logits/chosen": -2.51080584526062, "logits/rejected": -2.6007397174835205, "logps/chosen": -160.39112854003906, "logps/rejected": -168.5418243408203, "loss": 0.7279, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6518469452857971, "rewards/margins": 0.04480691999197006, "rewards/rejected": -0.6966539025306702, "step": 643 }, { "epoch": 0.84, "learning_rate": 4.277872161641682e-05, "logits/chosen": -2.3960509300231934, "logits/rejected": -2.51719331741333, "logps/chosen": -172.503662109375, "logps/rejected": -211.3986053466797, "loss": 0.6125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6145063638687134, "rewards/margins": 0.2776188254356384, "rewards/rejected": -0.892125129699707, "step": 644 }, { "epoch": 0.84, "learning_rate": 4.275351324371283e-05, "logits/chosen": -2.6039271354675293, "logits/rejected": -2.669445037841797, "logps/chosen": -167.97862243652344, "logps/rejected": -215.40682983398438, "loss": 0.5704, "rewards/accuracies": 0.75, "rewards/chosen": -0.5948019027709961, "rewards/margins": 0.3271142840385437, "rewards/rejected": -0.921916127204895, "step": 645 }, { "epoch": 0.85, "learning_rate": 4.2728268403770145e-05, "logits/chosen": -2.222475528717041, "logits/rejected": -2.220109462738037, "logps/chosen": -179.1140594482422, "logps/rejected": -203.96224975585938, "loss": 0.6496, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5808567404747009, "rewards/margins": 0.15382859110832214, "rewards/rejected": -0.7346853017807007, "step": 646 }, { "epoch": 0.85, "learning_rate": 4.270298714844381e-05, "logits/chosen": -2.5251059532165527, "logits/rejected": -2.5589253902435303, "logps/chosen": -173.88092041015625, "logps/rejected": -178.63037109375, "loss": 0.6081, "rewards/accuracies": 0.5, "rewards/chosen": -0.7299823760986328, "rewards/margins": 0.28834477066993713, "rewards/rejected": -1.018327236175537, "step": 647 }, { "epoch": 0.85, "learning_rate": 4.267766952966369e-05, "logits/chosen": -2.281771421432495, "logits/rejected": -2.4591827392578125, "logps/chosen": -148.9844207763672, "logps/rejected": -189.53805541992188, "loss": 0.5192, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5873172879219055, "rewards/margins": 0.48181021213531494, "rewards/rejected": -1.0691275596618652, "step": 648 }, { "epoch": 0.85, "learning_rate": 4.2652315599434354e-05, "logits/chosen": -2.4655325412750244, "logits/rejected": -2.5449090003967285, "logps/chosen": -154.69180297851562, "logps/rejected": -193.77456665039062, "loss": 0.6647, "rewards/accuracies": 0.625, "rewards/chosen": -0.7018711566925049, "rewards/margins": 0.11128158867359161, "rewards/rejected": -0.8131527900695801, "step": 649 }, { "epoch": 0.85, "learning_rate": 4.262692540983496e-05, "logits/chosen": -2.4712820053100586, "logits/rejected": -2.3742597103118896, "logps/chosen": -190.89785766601562, "logps/rejected": -224.13925170898438, "loss": 0.7112, "rewards/accuracies": 0.375, "rewards/chosen": -0.897792398929596, "rewards/margins": 0.05793970078229904, "rewards/rejected": -0.9557321667671204, "step": 650 }, { "epoch": 0.85, "learning_rate": 4.2601499013019126e-05, "logits/chosen": -2.483391284942627, "logits/rejected": -2.4832870960235596, "logps/chosen": -173.18243408203125, "logps/rejected": -187.56520080566406, "loss": 0.7033, "rewards/accuracies": 0.375, "rewards/chosen": -0.6757019758224487, "rewards/margins": 0.04309311881661415, "rewards/rejected": -0.7187950611114502, "step": 651 }, { "epoch": 0.85, "learning_rate": 4.257603646121484e-05, "logits/chosen": -2.5267128944396973, "logits/rejected": -2.5165576934814453, "logps/chosen": -165.43246459960938, "logps/rejected": -212.12628173828125, "loss": 0.6429, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6913148164749146, "rewards/margins": 0.17455829679965973, "rewards/rejected": -0.8658731579780579, "step": 652 }, { "epoch": 0.85, "learning_rate": 4.2550537806724384e-05, "logits/chosen": -2.3866825103759766, "logits/rejected": -2.4320545196533203, "logps/chosen": -175.04782104492188, "logps/rejected": -203.4820556640625, "loss": 0.5674, "rewards/accuracies": 0.625, "rewards/chosen": -0.6151914000511169, "rewards/margins": 0.4012424945831299, "rewards/rejected": -1.0164339542388916, "step": 653 }, { "epoch": 0.86, "learning_rate": 4.2525003101924164e-05, "logits/chosen": -2.5912859439849854, "logits/rejected": -2.576547861099243, "logps/chosen": -219.98446655273438, "logps/rejected": -202.2375946044922, "loss": 0.7016, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9261603355407715, "rewards/margins": 0.16095884144306183, "rewards/rejected": -1.0871191024780273, "step": 654 }, { "epoch": 0.86, "learning_rate": 4.249943239926467e-05, "logits/chosen": -2.648683786392212, "logits/rejected": -2.5860180854797363, "logps/chosen": -175.8250732421875, "logps/rejected": -156.6357879638672, "loss": 0.7841, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6031679511070251, "rewards/margins": -0.12085067480802536, "rewards/rejected": -0.4823172092437744, "step": 655 }, { "epoch": 0.86, "learning_rate": 4.247382575127031e-05, "logits/chosen": -2.474801778793335, "logits/rejected": -2.3887550830841064, "logps/chosen": -198.35394287109375, "logps/rejected": -172.00584411621094, "loss": 0.7847, "rewards/accuracies": 0.4375, "rewards/chosen": -1.0092177391052246, "rewards/margins": -0.09214206784963608, "rewards/rejected": -0.9170756340026855, "step": 656 }, { "epoch": 0.86, "learning_rate": 4.2448183210539334e-05, "logits/chosen": -2.392206907272339, "logits/rejected": -2.489854335784912, "logps/chosen": -174.3745574951172, "logps/rejected": -220.22213745117188, "loss": 0.5326, "rewards/accuracies": 0.5, "rewards/chosen": -0.5829483270645142, "rewards/margins": 0.5717104077339172, "rewards/rejected": -1.1546587944030762, "step": 657 }, { "epoch": 0.86, "learning_rate": 4.2422504829743724e-05, "logits/chosen": -2.61334228515625, "logits/rejected": -2.6003055572509766, "logps/chosen": -213.92164611816406, "logps/rejected": -232.10540771484375, "loss": 0.6787, "rewards/accuracies": 0.625, "rewards/chosen": -0.8696513175964355, "rewards/margins": 0.26525798439979553, "rewards/rejected": -1.1349092721939087, "step": 658 }, { "epoch": 0.86, "learning_rate": 4.239679066162907e-05, "logits/chosen": -2.529151678085327, "logits/rejected": -2.584458351135254, "logps/chosen": -160.20848083496094, "logps/rejected": -203.44912719726562, "loss": 0.6072, "rewards/accuracies": 0.625, "rewards/chosen": -0.5726284980773926, "rewards/margins": 0.27831003069877625, "rewards/rejected": -0.8509385585784912, "step": 659 }, { "epoch": 0.86, "learning_rate": 4.237104075901449e-05, "logits/chosen": -2.543184995651245, "logits/rejected": -2.5258021354675293, "logps/chosen": -180.32992553710938, "logps/rejected": -199.39439392089844, "loss": 0.6992, "rewards/accuracies": 0.5625, "rewards/chosen": -0.811248779296875, "rewards/margins": 0.16956302523612976, "rewards/rejected": -0.9808117747306824, "step": 660 }, { "epoch": 0.87, "learning_rate": 4.234525517479248e-05, "logits/chosen": -2.471468925476074, "logits/rejected": -2.4575130939483643, "logps/chosen": -172.39584350585938, "logps/rejected": -185.70445251464844, "loss": 0.7311, "rewards/accuracies": 0.4375, "rewards/chosen": -0.81290203332901, "rewards/margins": 0.009819276630878448, "rewards/rejected": -0.8227213025093079, "step": 661 }, { "epoch": 0.87, "learning_rate": 4.2319433961928844e-05, "logits/chosen": -2.608738899230957, "logits/rejected": -2.4687836170196533, "logps/chosen": -185.58847045898438, "logps/rejected": -162.63841247558594, "loss": 0.7846, "rewards/accuracies": 0.3125, "rewards/chosen": -0.7829579710960388, "rewards/margins": -0.0719057098031044, "rewards/rejected": -0.7110522985458374, "step": 662 }, { "epoch": 0.87, "learning_rate": 4.229357717346257e-05, "logits/chosen": -2.5522217750549316, "logits/rejected": -2.489596366882324, "logps/chosen": -203.3766326904297, "logps/rejected": -254.9163055419922, "loss": 0.722, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7322261333465576, "rewards/margins": 0.030108261853456497, "rewards/rejected": -0.7623343467712402, "step": 663 }, { "epoch": 0.87, "learning_rate": 4.226768486250572e-05, "logits/chosen": -2.624052047729492, "logits/rejected": -2.6371800899505615, "logps/chosen": -184.49551391601562, "logps/rejected": -204.43121337890625, "loss": 0.7533, "rewards/accuracies": 0.5, "rewards/chosen": -0.8451451659202576, "rewards/margins": -0.06043161079287529, "rewards/rejected": -0.7847135663032532, "step": 664 }, { "epoch": 0.87, "learning_rate": 4.224175708224332e-05, "logits/chosen": -2.5530622005462646, "logits/rejected": -2.638960123062134, "logps/chosen": -176.18704223632812, "logps/rejected": -174.23574829101562, "loss": 0.7083, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5662491321563721, "rewards/margins": 0.15944433212280273, "rewards/rejected": -0.72569340467453, "step": 665 }, { "epoch": 0.87, "learning_rate": 4.221579388593326e-05, "logits/chosen": -2.688939094543457, "logits/rejected": -2.66377854347229, "logps/chosen": -160.33746337890625, "logps/rejected": -172.418212890625, "loss": 0.6331, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5651705265045166, "rewards/margins": 0.20773081481456757, "rewards/rejected": -0.7729012966156006, "step": 666 }, { "epoch": 0.87, "learning_rate": 4.218979532690616e-05, "logits/chosen": -2.484663248062134, "logits/rejected": -2.424830675125122, "logps/chosen": -191.41098022460938, "logps/rejected": -164.51815795898438, "loss": 0.8708, "rewards/accuracies": 0.375, "rewards/chosen": -0.9628737568855286, "rewards/margins": -0.2227509766817093, "rewards/rejected": -0.7401228547096252, "step": 667 }, { "epoch": 0.87, "learning_rate": 4.216376145856529e-05, "logits/chosen": -2.4857797622680664, "logits/rejected": -2.4634532928466797, "logps/chosen": -185.9803009033203, "logps/rejected": -229.830078125, "loss": 0.8148, "rewards/accuracies": 0.5, "rewards/chosen": -0.8997754454612732, "rewards/margins": -0.06498207151889801, "rewards/rejected": -0.8347933888435364, "step": 668 }, { "epoch": 0.88, "learning_rate": 4.213769233438646e-05, "logits/chosen": -2.5399997234344482, "logits/rejected": -2.670645236968994, "logps/chosen": -160.7857666015625, "logps/rejected": -172.67477416992188, "loss": 0.7154, "rewards/accuracies": 0.3125, "rewards/chosen": -0.5905480980873108, "rewards/margins": -0.010487288236618042, "rewards/rejected": -0.5800608396530151, "step": 669 }, { "epoch": 0.88, "learning_rate": 4.211158800791788e-05, "logits/chosen": -2.584531784057617, "logits/rejected": -2.6492509841918945, "logps/chosen": -179.07191467285156, "logps/rejected": -236.59219360351562, "loss": 0.632, "rewards/accuracies": 0.625, "rewards/chosen": -0.8085418343544006, "rewards/margins": 0.2305898368358612, "rewards/rejected": -1.0391316413879395, "step": 670 }, { "epoch": 0.88, "learning_rate": 4.208544853278008e-05, "logits/chosen": -2.7273218631744385, "logits/rejected": -2.674079656600952, "logps/chosen": -194.35113525390625, "logps/rejected": -203.67144775390625, "loss": 0.7744, "rewards/accuracies": 0.75, "rewards/chosen": -0.7947997450828552, "rewards/margins": -0.0895846039056778, "rewards/rejected": -0.7052150964736938, "step": 671 }, { "epoch": 0.88, "learning_rate": 4.205927396266577e-05, "logits/chosen": -2.5353407859802246, "logits/rejected": -2.473275899887085, "logps/chosen": -189.8173828125, "logps/rejected": -172.57937622070312, "loss": 0.7574, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7492716312408447, "rewards/margins": -0.07650469243526459, "rewards/rejected": -0.672766923904419, "step": 672 }, { "epoch": 0.88, "learning_rate": 4.203306435133978e-05, "logits/chosen": -2.4454925060272217, "logits/rejected": -2.436471462249756, "logps/chosen": -142.18865966796875, "logps/rejected": -181.4835968017578, "loss": 0.5801, "rewards/accuracies": 0.625, "rewards/chosen": -0.555526077747345, "rewards/margins": 0.41016635298728943, "rewards/rejected": -0.9656924605369568, "step": 673 }, { "epoch": 0.88, "learning_rate": 4.200681975263888e-05, "logits/chosen": -2.5019893646240234, "logits/rejected": -2.5508639812469482, "logps/chosen": -164.3115997314453, "logps/rejected": -172.49722290039062, "loss": 0.7833, "rewards/accuracies": 0.3125, "rewards/chosen": -0.6514595746994019, "rewards/margins": -0.11908778548240662, "rewards/rejected": -0.5323717594146729, "step": 674 }, { "epoch": 0.88, "learning_rate": 4.1980540220471744e-05, "logits/chosen": -2.5552783012390137, "logits/rejected": -2.547940731048584, "logps/chosen": -187.34030151367188, "logps/rejected": -205.068115234375, "loss": 0.7329, "rewards/accuracies": 0.5, "rewards/chosen": -0.6662761569023132, "rewards/margins": 0.10254745930433273, "rewards/rejected": -0.7688236236572266, "step": 675 }, { "epoch": 0.88, "learning_rate": 4.195422580881878e-05, "logits/chosen": -2.5980396270751953, "logits/rejected": -2.633117437362671, "logps/chosen": -169.28765869140625, "logps/rejected": -175.1426544189453, "loss": 0.783, "rewards/accuracies": 0.4375, "rewards/chosen": -0.738059937953949, "rewards/margins": -0.070980504155159, "rewards/rejected": -0.6670793890953064, "step": 676 }, { "epoch": 0.89, "learning_rate": 4.192787657173204e-05, "logits/chosen": -2.5799622535705566, "logits/rejected": -2.64198637008667, "logps/chosen": -169.6929931640625, "logps/rejected": -194.3636932373047, "loss": 0.5487, "rewards/accuracies": 0.75, "rewards/chosen": -0.3179047703742981, "rewards/margins": 0.39317452907562256, "rewards/rejected": -0.7110792398452759, "step": 677 }, { "epoch": 0.89, "learning_rate": 4.1901492563335115e-05, "logits/chosen": -2.402824878692627, "logits/rejected": -2.479947328567505, "logps/chosen": -168.98193359375, "logps/rejected": -200.73716735839844, "loss": 0.7085, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7477744221687317, "rewards/margins": 0.06644614040851593, "rewards/rejected": -0.8142206072807312, "step": 678 }, { "epoch": 0.89, "learning_rate": 4.187507383782303e-05, "logits/chosen": -2.6026642322540283, "logits/rejected": -2.6655664443969727, "logps/chosen": -158.02281188964844, "logps/rejected": -161.2207489013672, "loss": 0.6549, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7262417078018188, "rewards/margins": 0.15959057211875916, "rewards/rejected": -0.8858322501182556, "step": 679 }, { "epoch": 0.89, "learning_rate": 4.1848620449462115e-05, "logits/chosen": -2.683466911315918, "logits/rejected": -2.7357425689697266, "logps/chosen": -183.96287536621094, "logps/rejected": -185.6376190185547, "loss": 0.6963, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7100127935409546, "rewards/margins": 0.05447208136320114, "rewards/rejected": -0.7644848823547363, "step": 680 }, { "epoch": 0.89, "learning_rate": 4.1822132452589885e-05, "logits/chosen": -2.594881057739258, "logits/rejected": -2.6565728187561035, "logps/chosen": -182.09751892089844, "logps/rejected": -229.0118408203125, "loss": 0.6154, "rewards/accuracies": 0.5625, "rewards/chosen": -0.530844509601593, "rewards/margins": 0.22644518315792084, "rewards/rejected": -0.757289707660675, "step": 681 }, { "epoch": 0.89, "learning_rate": 4.1795609901614966e-05, "logits/chosen": -2.451526641845703, "logits/rejected": -2.444248914718628, "logps/chosen": -159.2547607421875, "logps/rejected": -141.80270385742188, "loss": 0.7846, "rewards/accuracies": 0.3125, "rewards/chosen": -0.6390894651412964, "rewards/margins": -0.018851161003112793, "rewards/rejected": -0.6202382445335388, "step": 682 }, { "epoch": 0.89, "learning_rate": 4.176905285101695e-05, "logits/chosen": -2.7691075801849365, "logits/rejected": -2.7721362113952637, "logps/chosen": -177.64317321777344, "logps/rejected": -199.67703247070312, "loss": 0.6667, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5808727145195007, "rewards/margins": 0.10210245847702026, "rewards/rejected": -0.682975172996521, "step": 683 }, { "epoch": 0.9, "learning_rate": 4.17424613553463e-05, "logits/chosen": -2.6382815837860107, "logits/rejected": -2.6818408966064453, "logps/chosen": -199.1725616455078, "logps/rejected": -222.76007080078125, "loss": 0.7032, "rewards/accuracies": 0.625, "rewards/chosen": -0.7650430202484131, "rewards/margins": 0.04612483084201813, "rewards/rejected": -0.8111678957939148, "step": 684 }, { "epoch": 0.9, "learning_rate": 4.171583546922423e-05, "logits/chosen": -2.702162504196167, "logits/rejected": -2.6568708419799805, "logps/chosen": -196.53262329101562, "logps/rejected": -185.51022338867188, "loss": 0.7123, "rewards/accuracies": 0.5, "rewards/chosen": -0.7624577283859253, "rewards/margins": 0.01695454865694046, "rewards/rejected": -0.7794123291969299, "step": 685 }, { "epoch": 0.9, "learning_rate": 4.1689175247342584e-05, "logits/chosen": -2.6585752964019775, "logits/rejected": -2.6265594959259033, "logps/chosen": -184.20936584472656, "logps/rejected": -177.85145568847656, "loss": 0.8148, "rewards/accuracies": 0.5, "rewards/chosen": -0.6442875266075134, "rewards/margins": -0.16329364478588104, "rewards/rejected": -0.48099392652511597, "step": 686 }, { "epoch": 0.9, "learning_rate": 4.1662480744463744e-05, "logits/chosen": -2.4449877738952637, "logits/rejected": -2.6396286487579346, "logps/chosen": -160.87420654296875, "logps/rejected": -172.5674285888672, "loss": 0.6514, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6140396595001221, "rewards/margins": 0.16129107773303986, "rewards/rejected": -0.7753307223320007, "step": 687 }, { "epoch": 0.9, "learning_rate": 4.163575201542052e-05, "logits/chosen": -2.6318554878234863, "logits/rejected": -2.7358765602111816, "logps/chosen": -158.69976806640625, "logps/rejected": -195.73008728027344, "loss": 0.5346, "rewards/accuracies": 0.75, "rewards/chosen": -0.351512610912323, "rewards/margins": 0.4037948548793793, "rewards/rejected": -0.7553074955940247, "step": 688 }, { "epoch": 0.9, "learning_rate": 4.1608989115116e-05, "logits/chosen": -2.5840697288513184, "logits/rejected": -2.585153579711914, "logps/chosen": -170.15512084960938, "logps/rejected": -178.3724365234375, "loss": 0.7501, "rewards/accuracies": 0.625, "rewards/chosen": -0.7801914811134338, "rewards/margins": -0.008921336382627487, "rewards/rejected": -0.7712701559066772, "step": 689 }, { "epoch": 0.9, "learning_rate": 4.158219209852349e-05, "logits/chosen": -2.6560463905334473, "logits/rejected": -2.6988308429718018, "logps/chosen": -175.9833526611328, "logps/rejected": -184.16749572753906, "loss": 0.6545, "rewards/accuracies": 0.5, "rewards/chosen": -0.49359026551246643, "rewards/margins": 0.11187607795000076, "rewards/rejected": -0.605466365814209, "step": 690 }, { "epoch": 0.9, "learning_rate": 4.155536102068636e-05, "logits/chosen": -2.5828936100006104, "logits/rejected": -2.6212708950042725, "logps/chosen": -181.11277770996094, "logps/rejected": -201.1419219970703, "loss": 0.8704, "rewards/accuracies": 0.25, "rewards/chosen": -0.7604485750198364, "rewards/margins": -0.27131497859954834, "rewards/rejected": -0.4891335368156433, "step": 691 }, { "epoch": 0.91, "learning_rate": 4.152849593671793e-05, "logits/chosen": -2.716947078704834, "logits/rejected": -2.65468168258667, "logps/chosen": -193.97682189941406, "logps/rejected": -207.4134521484375, "loss": 0.7631, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6618382930755615, "rewards/margins": -0.0877380520105362, "rewards/rejected": -0.5741002559661865, "step": 692 }, { "epoch": 0.91, "learning_rate": 4.1501596901801384e-05, "logits/chosen": -2.557304620742798, "logits/rejected": -2.6196579933166504, "logps/chosen": -181.52548217773438, "logps/rejected": -195.8028564453125, "loss": 0.7537, "rewards/accuracies": 0.625, "rewards/chosen": -0.6008011698722839, "rewards/margins": -0.043041955679655075, "rewards/rejected": -0.5577592253684998, "step": 693 }, { "epoch": 0.91, "learning_rate": 4.147466397118968e-05, "logits/chosen": -2.567335605621338, "logits/rejected": -2.617478609085083, "logps/chosen": -179.599853515625, "logps/rejected": -198.3054656982422, "loss": 0.6778, "rewards/accuracies": 0.5, "rewards/chosen": -0.8331121802330017, "rewards/margins": 0.06144791468977928, "rewards/rejected": -0.8945600986480713, "step": 694 }, { "epoch": 0.91, "learning_rate": 4.144769720020533e-05, "logits/chosen": -2.576467990875244, "logits/rejected": -2.8038601875305176, "logps/chosen": -174.2159423828125, "logps/rejected": -233.60653686523438, "loss": 0.6067, "rewards/accuracies": 0.75, "rewards/chosen": -0.6737346649169922, "rewards/margins": 0.23855170607566833, "rewards/rejected": -0.9122863411903381, "step": 695 }, { "epoch": 0.91, "learning_rate": 4.142069664424041e-05, "logits/chosen": -2.678194284439087, "logits/rejected": -2.6755244731903076, "logps/chosen": -198.93954467773438, "logps/rejected": -184.6238555908203, "loss": 0.723, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5593891143798828, "rewards/margins": 0.06559957563877106, "rewards/rejected": -0.6249886751174927, "step": 696 }, { "epoch": 0.91, "learning_rate": 4.139366235875637e-05, "logits/chosen": -2.5291852951049805, "logits/rejected": -2.5268821716308594, "logps/chosen": -193.07666015625, "logps/rejected": -176.4041290283203, "loss": 0.8631, "rewards/accuracies": 0.3125, "rewards/chosen": -0.8501958250999451, "rewards/margins": -0.2449750304222107, "rewards/rejected": -0.6052207946777344, "step": 697 }, { "epoch": 0.91, "learning_rate": 4.136659439928397e-05, "logits/chosen": -2.7552402019500732, "logits/rejected": -2.730792760848999, "logps/chosen": -154.31442260742188, "logps/rejected": -201.8627166748047, "loss": 0.635, "rewards/accuracies": 0.5, "rewards/chosen": -0.6791579723358154, "rewards/margins": 0.18784941732883453, "rewards/rejected": -0.867007315158844, "step": 698 }, { "epoch": 0.91, "learning_rate": 4.13394928214231e-05, "logits/chosen": -2.8812167644500732, "logits/rejected": -2.929877996444702, "logps/chosen": -226.31317138671875, "logps/rejected": -223.9871826171875, "loss": 0.7274, "rewards/accuracies": 0.5, "rewards/chosen": -0.759143590927124, "rewards/margins": 0.025182515382766724, "rewards/rejected": -0.7843260765075684, "step": 699 }, { "epoch": 0.92, "learning_rate": 4.1312357680842735e-05, "logits/chosen": -2.7592363357543945, "logits/rejected": -2.787437915802002, "logps/chosen": -176.06568908691406, "logps/rejected": -198.5147705078125, "loss": 0.7985, "rewards/accuracies": 0.3125, "rewards/chosen": -0.6221618056297302, "rewards/margins": -0.08630318939685822, "rewards/rejected": -0.5358585715293884, "step": 700 }, { "epoch": 0.92, "learning_rate": 4.128518903328078e-05, "logits/chosen": -2.5527689456939697, "logits/rejected": -2.569162368774414, "logps/chosen": -168.37327575683594, "logps/rejected": -153.46510314941406, "loss": 0.7245, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7768934369087219, "rewards/margins": -0.018674585968255997, "rewards/rejected": -0.7582188844680786, "step": 701 }, { "epoch": 0.92, "learning_rate": 4.125798693454396e-05, "logits/chosen": -2.6978816986083984, "logits/rejected": -2.775778293609619, "logps/chosen": -181.85899353027344, "logps/rejected": -186.74415588378906, "loss": 0.6015, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6942383050918579, "rewards/margins": 0.28554674983024597, "rewards/rejected": -0.9797850847244263, "step": 702 }, { "epoch": 0.92, "learning_rate": 4.123075144050772e-05, "logits/chosen": -2.420675754547119, "logits/rejected": -2.4404735565185547, "logps/chosen": -187.6428985595703, "logps/rejected": -196.76223754882812, "loss": 0.6309, "rewards/accuracies": 0.625, "rewards/chosen": -0.48836079239845276, "rewards/margins": 0.17586764693260193, "rewards/rejected": -0.6642284393310547, "step": 703 }, { "epoch": 0.92, "learning_rate": 4.120348260711611e-05, "logits/chosen": -2.696930170059204, "logits/rejected": -2.7356936931610107, "logps/chosen": -224.2950897216797, "logps/rejected": -201.0372314453125, "loss": 0.6719, "rewards/accuracies": 0.5, "rewards/chosen": -0.5922297239303589, "rewards/margins": 0.0864262580871582, "rewards/rejected": -0.6786559224128723, "step": 704 }, { "epoch": 0.92, "learning_rate": 4.117618049038165e-05, "logits/chosen": -2.433835029602051, "logits/rejected": -2.5175817012786865, "logps/chosen": -205.09742736816406, "logps/rejected": -253.45188903808594, "loss": 0.6352, "rewards/accuracies": 0.75, "rewards/chosen": -0.5550520420074463, "rewards/margins": 0.19067110121250153, "rewards/rejected": -0.7457231879234314, "step": 705 }, { "epoch": 0.92, "learning_rate": 4.1148845146385214e-05, "logits/chosen": -2.7056033611297607, "logits/rejected": -2.6854336261749268, "logps/chosen": -176.9462127685547, "logps/rejected": -163.0907745361328, "loss": 0.7122, "rewards/accuracies": 0.5, "rewards/chosen": -0.585013747215271, "rewards/margins": 0.12466025352478027, "rewards/rejected": -0.7096740007400513, "step": 706 }, { "epoch": 0.93, "learning_rate": 4.112147663127596e-05, "logits/chosen": -2.5832924842834473, "logits/rejected": -2.6641876697540283, "logps/chosen": -291.26531982421875, "logps/rejected": -285.40997314453125, "loss": 0.8873, "rewards/accuracies": 0.375, "rewards/chosen": -0.9817371368408203, "rewards/margins": -0.22740018367767334, "rewards/rejected": -0.754336953163147, "step": 707 }, { "epoch": 0.93, "learning_rate": 4.109407500127116e-05, "logits/chosen": -2.7471392154693604, "logits/rejected": -2.6390812397003174, "logps/chosen": -175.82046508789062, "logps/rejected": -154.3852081298828, "loss": 0.7906, "rewards/accuracies": 0.375, "rewards/chosen": -0.8870856165885925, "rewards/margins": -0.14796000719070435, "rewards/rejected": -0.7391257286071777, "step": 708 }, { "epoch": 0.93, "learning_rate": 4.106664031265611e-05, "logits/chosen": -2.671312093734741, "logits/rejected": -2.690074920654297, "logps/chosen": -165.29336547851562, "logps/rejected": -158.92807006835938, "loss": 0.6831, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6163545846939087, "rewards/margins": 0.10010550171136856, "rewards/rejected": -0.716460108757019, "step": 709 }, { "epoch": 0.93, "learning_rate": 4.103917262178402e-05, "logits/chosen": -2.6318283081054688, "logits/rejected": -2.65425968170166, "logps/chosen": -161.3974609375, "logps/rejected": -159.72401428222656, "loss": 0.674, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6365061402320862, "rewards/margins": 0.0781688243150711, "rewards/rejected": -0.7146750688552856, "step": 710 }, { "epoch": 0.93, "learning_rate": 4.1011671985075865e-05, "logits/chosen": -2.6295552253723145, "logits/rejected": -2.5962119102478027, "logps/chosen": -183.000732421875, "logps/rejected": -172.703125, "loss": 0.7224, "rewards/accuracies": 0.25, "rewards/chosen": -0.6528486013412476, "rewards/margins": -0.025449033826589584, "rewards/rejected": -0.6273995041847229, "step": 711 }, { "epoch": 0.93, "learning_rate": 4.098413845902033e-05, "logits/chosen": -2.6849284172058105, "logits/rejected": -2.7957422733306885, "logps/chosen": -182.01235961914062, "logps/rejected": -198.17079162597656, "loss": 0.6462, "rewards/accuracies": 0.75, "rewards/chosen": -0.577471911907196, "rewards/margins": 0.14899027347564697, "rewards/rejected": -0.7264621257781982, "step": 712 }, { "epoch": 0.93, "learning_rate": 4.095657210017364e-05, "logits/chosen": -2.5923876762390137, "logits/rejected": -2.6000397205352783, "logps/chosen": -200.19229125976562, "logps/rejected": -190.99234008789062, "loss": 0.7024, "rewards/accuracies": 0.625, "rewards/chosen": -0.5822942852973938, "rewards/margins": 0.05264808610081673, "rewards/rejected": -0.6349424123764038, "step": 713 }, { "epoch": 0.93, "learning_rate": 4.092897296515944e-05, "logits/chosen": -2.501671552658081, "logits/rejected": -2.496816635131836, "logps/chosen": -186.364990234375, "logps/rejected": -185.56118774414062, "loss": 0.7653, "rewards/accuracies": 0.375, "rewards/chosen": -0.7025803327560425, "rewards/margins": -0.02450786530971527, "rewards/rejected": -0.6780725121498108, "step": 714 }, { "epoch": 0.94, "learning_rate": 4.090134111066874e-05, "logits/chosen": -2.688492774963379, "logits/rejected": -2.6672098636627197, "logps/chosen": -168.45094299316406, "logps/rejected": -199.4703826904297, "loss": 0.6693, "rewards/accuracies": 0.5, "rewards/chosen": -0.6713076233863831, "rewards/margins": 0.13783812522888184, "rewards/rejected": -0.8091457486152649, "step": 715 }, { "epoch": 0.94, "learning_rate": 4.0873676593459725e-05, "logits/chosen": -2.4400930404663086, "logits/rejected": -2.5282256603240967, "logps/chosen": -153.5916748046875, "logps/rejected": -178.5830841064453, "loss": 0.6766, "rewards/accuracies": 0.5, "rewards/chosen": -0.4613954722881317, "rewards/margins": 0.07974334061145782, "rewards/rejected": -0.541138768196106, "step": 716 }, { "epoch": 0.94, "learning_rate": 4.08459794703577e-05, "logits/chosen": -2.6582794189453125, "logits/rejected": -2.626995086669922, "logps/chosen": -208.72027587890625, "logps/rejected": -197.92471313476562, "loss": 0.7412, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6259132027626038, "rewards/margins": -0.06286411732435226, "rewards/rejected": -0.5630490779876709, "step": 717 }, { "epoch": 0.94, "learning_rate": 4.081824979825492e-05, "logits/chosen": -2.6991827487945557, "logits/rejected": -2.7381110191345215, "logps/chosen": -182.42140197753906, "logps/rejected": -178.61044311523438, "loss": 0.6404, "rewards/accuracies": 0.5, "rewards/chosen": -0.6173000931739807, "rewards/margins": 0.1515488624572754, "rewards/rejected": -0.7688489556312561, "step": 718 }, { "epoch": 0.94, "learning_rate": 4.07904876341105e-05, "logits/chosen": -2.5119433403015137, "logits/rejected": -2.567487955093384, "logps/chosen": -191.7221221923828, "logps/rejected": -211.65084838867188, "loss": 0.6315, "rewards/accuracies": 0.625, "rewards/chosen": -0.7405194640159607, "rewards/margins": 0.18211159110069275, "rewards/rejected": -0.922631025314331, "step": 719 }, { "epoch": 0.94, "learning_rate": 4.076269303495033e-05, "logits/chosen": -2.662774085998535, "logits/rejected": -2.568493366241455, "logps/chosen": -183.8392791748047, "logps/rejected": -171.7985076904297, "loss": 0.8173, "rewards/accuracies": 0.3125, "rewards/chosen": -0.6661974787712097, "rewards/margins": -0.2225557267665863, "rewards/rejected": -0.4436417520046234, "step": 720 }, { "epoch": 0.94, "learning_rate": 4.073486605786689e-05, "logits/chosen": -2.7839298248291016, "logits/rejected": -2.884078025817871, "logps/chosen": -219.7886962890625, "logps/rejected": -244.15625, "loss": 0.6198, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7247489094734192, "rewards/margins": 0.2933644950389862, "rewards/rejected": -1.018113374710083, "step": 721 }, { "epoch": 0.95, "learning_rate": 4.0707006760019175e-05, "logits/chosen": -2.6340246200561523, "logits/rejected": -2.6862027645111084, "logps/chosen": -178.46224975585938, "logps/rejected": -213.61317443847656, "loss": 0.6549, "rewards/accuracies": 0.5, "rewards/chosen": -0.6767200231552124, "rewards/margins": 0.19127850234508514, "rewards/rejected": -0.8679986000061035, "step": 722 }, { "epoch": 0.95, "learning_rate": 4.067911519863257e-05, "logits/chosen": -2.531005620956421, "logits/rejected": -2.562150001525879, "logps/chosen": -194.95269775390625, "logps/rejected": -199.6376953125, "loss": 0.6151, "rewards/accuracies": 0.75, "rewards/chosen": -0.5694127082824707, "rewards/margins": 0.24292032420635223, "rewards/rejected": -0.8123329877853394, "step": 723 }, { "epoch": 0.95, "learning_rate": 4.065119143099874e-05, "logits/chosen": -2.714223861694336, "logits/rejected": -2.7573318481445312, "logps/chosen": -191.0901641845703, "logps/rejected": -251.58689880371094, "loss": 0.5495, "rewards/accuracies": 0.8125, "rewards/chosen": -0.43053001165390015, "rewards/margins": 0.36911848187446594, "rewards/rejected": -0.7996485233306885, "step": 724 }, { "epoch": 0.95, "learning_rate": 4.062323551447549e-05, "logits/chosen": -2.6741175651550293, "logits/rejected": -2.6618902683258057, "logps/chosen": -181.743896484375, "logps/rejected": -191.43267822265625, "loss": 0.6072, "rewards/accuracies": 0.75, "rewards/chosen": -0.5965117812156677, "rewards/margins": 0.20273733139038086, "rewards/rejected": -0.7992490530014038, "step": 725 }, { "epoch": 0.95, "learning_rate": 4.059524750648668e-05, "logits/chosen": -2.6173298358917236, "logits/rejected": -2.719005584716797, "logps/chosen": -128.7156982421875, "logps/rejected": -163.35720825195312, "loss": 0.6558, "rewards/accuracies": 0.5, "rewards/chosen": -0.4427485764026642, "rewards/margins": 0.11390358209609985, "rewards/rejected": -0.5566521883010864, "step": 726 }, { "epoch": 0.95, "learning_rate": 4.056722746452207e-05, "logits/chosen": -2.7046196460723877, "logits/rejected": -2.626006603240967, "logps/chosen": -192.15426635742188, "logps/rejected": -203.4480743408203, "loss": 0.8411, "rewards/accuracies": 0.3125, "rewards/chosen": -0.9564641714096069, "rewards/margins": -0.2163528949022293, "rewards/rejected": -0.740111231803894, "step": 727 }, { "epoch": 0.95, "learning_rate": 4.053917544613723e-05, "logits/chosen": -2.6192758083343506, "logits/rejected": -2.649017333984375, "logps/chosen": -175.56922912597656, "logps/rejected": -234.2914276123047, "loss": 0.7219, "rewards/accuracies": 0.375, "rewards/chosen": -0.8219558000564575, "rewards/margins": 0.023496918380260468, "rewards/rejected": -0.8454526662826538, "step": 728 }, { "epoch": 0.95, "learning_rate": 4.051109150895343e-05, "logits/chosen": -2.610642671585083, "logits/rejected": -2.615621566772461, "logps/chosen": -195.70941162109375, "logps/rejected": -194.20928955078125, "loss": 0.7166, "rewards/accuracies": 0.5, "rewards/chosen": -0.6751452088356018, "rewards/margins": 0.013915710151195526, "rewards/rejected": -0.6890608668327332, "step": 729 }, { "epoch": 0.96, "learning_rate": 4.0482975710657455e-05, "logits/chosen": -2.456711530685425, "logits/rejected": -2.5238044261932373, "logps/chosen": -199.232666015625, "logps/rejected": -211.0916748046875, "loss": 0.6943, "rewards/accuracies": 0.5, "rewards/chosen": -0.7001582980155945, "rewards/margins": 0.0675555020570755, "rewards/rejected": -0.7677137851715088, "step": 730 }, { "epoch": 0.96, "learning_rate": 4.045482810900159e-05, "logits/chosen": -2.4926917552948, "logits/rejected": -2.4629762172698975, "logps/chosen": -201.10394287109375, "logps/rejected": -212.8970947265625, "loss": 0.6293, "rewards/accuracies": 0.625, "rewards/chosen": -0.5266129970550537, "rewards/margins": 0.1588543802499771, "rewards/rejected": -0.6854673624038696, "step": 731 }, { "epoch": 0.96, "learning_rate": 4.042664876180341e-05, "logits/chosen": -2.6272189617156982, "logits/rejected": -2.6395418643951416, "logps/chosen": -173.2360076904297, "logps/rejected": -186.01959228515625, "loss": 0.6232, "rewards/accuracies": 0.625, "rewards/chosen": -0.5731245875358582, "rewards/margins": 0.21896684169769287, "rewards/rejected": -0.7920913696289062, "step": 732 }, { "epoch": 0.96, "learning_rate": 4.0398437726945716e-05, "logits/chosen": -2.711691379547119, "logits/rejected": -2.7025833129882812, "logps/chosen": -189.71885681152344, "logps/rejected": -210.70761108398438, "loss": 0.7556, "rewards/accuracies": 0.3125, "rewards/chosen": -0.6554818153381348, "rewards/margins": -0.10323198139667511, "rewards/rejected": -0.5522497892379761, "step": 733 }, { "epoch": 0.96, "learning_rate": 4.037019506237638e-05, "logits/chosen": -2.603912830352783, "logits/rejected": -2.686739683151245, "logps/chosen": -186.6495819091797, "logps/rejected": -210.13623046875, "loss": 0.623, "rewards/accuracies": 0.625, "rewards/chosen": -0.7819252610206604, "rewards/margins": 0.26127052307128906, "rewards/rejected": -1.0431957244873047, "step": 734 }, { "epoch": 0.96, "learning_rate": 4.034192082610828e-05, "logits/chosen": -2.6118476390838623, "logits/rejected": -2.5994834899902344, "logps/chosen": -164.40380859375, "logps/rejected": -141.6078643798828, "loss": 0.6502, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5243350863456726, "rewards/margins": 0.12014760076999664, "rewards/rejected": -0.6444827318191528, "step": 735 }, { "epoch": 0.96, "learning_rate": 4.031361507621911e-05, "logits/chosen": -2.501145362854004, "logits/rejected": -2.5378122329711914, "logps/chosen": -204.41793823242188, "logps/rejected": -225.99737548828125, "loss": 0.6733, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8046965003013611, "rewards/margins": 0.13057535886764526, "rewards/rejected": -0.9352718591690063, "step": 736 }, { "epoch": 0.96, "learning_rate": 4.02852778708513e-05, "logits/chosen": -2.7680859565734863, "logits/rejected": -2.828195333480835, "logps/chosen": -221.7406768798828, "logps/rejected": -239.29904174804688, "loss": 0.6113, "rewards/accuracies": 0.75, "rewards/chosen": -0.9672201871871948, "rewards/margins": 0.20203211903572083, "rewards/rejected": -1.1692522764205933, "step": 737 }, { "epoch": 0.97, "learning_rate": 4.0256909268211914e-05, "logits/chosen": -2.7258799076080322, "logits/rejected": -2.7185235023498535, "logps/chosen": -152.56524658203125, "logps/rejected": -151.9055633544922, "loss": 0.7743, "rewards/accuracies": 0.5, "rewards/chosen": -0.7225962281227112, "rewards/margins": -0.00214192271232605, "rewards/rejected": -0.7204542756080627, "step": 738 }, { "epoch": 0.97, "learning_rate": 4.0228509326572496e-05, "logits/chosen": -2.5518906116485596, "logits/rejected": -2.5055699348449707, "logps/chosen": -212.88555908203125, "logps/rejected": -177.05625915527344, "loss": 0.6684, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5254353284835815, "rewards/margins": 0.1856461465358734, "rewards/rejected": -0.7110814452171326, "step": 739 }, { "epoch": 0.97, "learning_rate": 4.0200078104268944e-05, "logits/chosen": -2.642047166824341, "logits/rejected": -2.56471848487854, "logps/chosen": -159.68580627441406, "logps/rejected": -175.0918731689453, "loss": 0.7278, "rewards/accuracies": 0.5, "rewards/chosen": -0.7405662536621094, "rewards/margins": -0.027810221537947655, "rewards/rejected": -0.7127560377120972, "step": 740 }, { "epoch": 0.97, "learning_rate": 4.017161565970144e-05, "logits/chosen": -2.485898971557617, "logits/rejected": -2.4769692420959473, "logps/chosen": -139.57974243164062, "logps/rejected": -172.51040649414062, "loss": 0.7386, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7599843144416809, "rewards/margins": -0.04223020374774933, "rewards/rejected": -0.717754065990448, "step": 741 }, { "epoch": 0.97, "learning_rate": 4.014312205133428e-05, "logits/chosen": -2.585129737854004, "logits/rejected": -2.5427184104919434, "logps/chosen": -200.5863494873047, "logps/rejected": -221.85003662109375, "loss": 0.7925, "rewards/accuracies": 0.375, "rewards/chosen": -0.8390079736709595, "rewards/margins": -0.11233790963888168, "rewards/rejected": -0.7266700863838196, "step": 742 }, { "epoch": 0.97, "learning_rate": 4.011459733769579e-05, "logits/chosen": -2.524702548980713, "logits/rejected": -2.5253896713256836, "logps/chosen": -183.24224853515625, "logps/rejected": -185.02462768554688, "loss": 0.8256, "rewards/accuracies": 0.3125, "rewards/chosen": -0.8384305238723755, "rewards/margins": -0.15028908848762512, "rewards/rejected": -0.6881413459777832, "step": 743 }, { "epoch": 0.97, "learning_rate": 4.0086041577378166e-05, "logits/chosen": -2.4046497344970703, "logits/rejected": -2.4749138355255127, "logps/chosen": -178.8353729248047, "logps/rejected": -187.75807189941406, "loss": 0.7012, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6531072854995728, "rewards/margins": 0.04634825140237808, "rewards/rejected": -0.6994554996490479, "step": 744 }, { "epoch": 0.98, "learning_rate": 4.005745482903739e-05, "logits/chosen": -2.347320795059204, "logits/rejected": -2.4178340435028076, "logps/chosen": -176.78187561035156, "logps/rejected": -216.67189025878906, "loss": 0.6512, "rewards/accuracies": 0.625, "rewards/chosen": -0.8543468713760376, "rewards/margins": 0.18746432662010193, "rewards/rejected": -1.0418111085891724, "step": 745 }, { "epoch": 0.98, "learning_rate": 4.002883715139309e-05, "logits/chosen": -2.698127269744873, "logits/rejected": -2.731611967086792, "logps/chosen": -199.87789916992188, "logps/rejected": -235.65176391601562, "loss": 0.6355, "rewards/accuracies": 0.625, "rewards/chosen": -0.751441240310669, "rewards/margins": 0.19296592473983765, "rewards/rejected": -0.944407045841217, "step": 746 }, { "epoch": 0.98, "learning_rate": 4.000018860322845e-05, "logits/chosen": -2.8345818519592285, "logits/rejected": -2.795546531677246, "logps/chosen": -204.96926879882812, "logps/rejected": -216.8634490966797, "loss": 0.6564, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6679666638374329, "rewards/margins": 0.1266886293888092, "rewards/rejected": -0.7946553230285645, "step": 747 }, { "epoch": 0.98, "learning_rate": 3.9971509243390025e-05, "logits/chosen": -2.5881407260894775, "logits/rejected": -2.6218934059143066, "logps/chosen": -189.1737060546875, "logps/rejected": -197.3935546875, "loss": 0.7232, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7519857883453369, "rewards/margins": -0.0216445904225111, "rewards/rejected": -0.7303412556648254, "step": 748 }, { "epoch": 0.98, "learning_rate": 3.99427991307877e-05, "logits/chosen": -2.5421721935272217, "logits/rejected": -2.579972743988037, "logps/chosen": -193.97085571289062, "logps/rejected": -230.16653442382812, "loss": 0.705, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8407448530197144, "rewards/margins": 0.10392985492944717, "rewards/rejected": -0.9446746706962585, "step": 749 }, { "epoch": 0.98, "learning_rate": 3.9914058324394486e-05, "logits/chosen": -2.632858991622925, "logits/rejected": -2.6479578018188477, "logps/chosen": -201.80633544921875, "logps/rejected": -199.26773071289062, "loss": 0.5815, "rewards/accuracies": 0.75, "rewards/chosen": -0.5169165134429932, "rewards/margins": 0.283170223236084, "rewards/rejected": -0.8000867366790771, "step": 750 }, { "epoch": 0.98, "learning_rate": 3.9885286883246476e-05, "logits/chosen": -2.655200481414795, "logits/rejected": -2.6914384365081787, "logps/chosen": -178.44381713867188, "logps/rejected": -149.90350341796875, "loss": 0.7038, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6623535752296448, "rewards/margins": 0.008262221701443195, "rewards/rejected": -0.6706157326698303, "step": 751 }, { "epoch": 0.98, "learning_rate": 3.985648486644267e-05, "logits/chosen": -2.3822569847106934, "logits/rejected": -2.3234899044036865, "logps/chosen": -181.5887451171875, "logps/rejected": -169.651123046875, "loss": 0.7508, "rewards/accuracies": 0.3125, "rewards/chosen": -0.7002227902412415, "rewards/margins": -0.04468606412410736, "rewards/rejected": -0.6555367708206177, "step": 752 }, { "epoch": 0.99, "learning_rate": 3.982765233314489e-05, "logits/chosen": -2.6348283290863037, "logits/rejected": -2.7363362312316895, "logps/chosen": -175.08506774902344, "logps/rejected": -196.49099731445312, "loss": 0.6209, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5249355435371399, "rewards/margins": 0.2879376709461212, "rewards/rejected": -0.812873125076294, "step": 753 }, { "epoch": 0.99, "learning_rate": 3.979878934257762e-05, "logits/chosen": -2.608914375305176, "logits/rejected": -2.7260642051696777, "logps/chosen": -182.08633422851562, "logps/rejected": -173.453857421875, "loss": 0.6227, "rewards/accuracies": 0.625, "rewards/chosen": -0.4596517086029053, "rewards/margins": 0.3150676190853119, "rewards/rejected": -0.7747193574905396, "step": 754 }, { "epoch": 0.99, "learning_rate": 3.976989595402793e-05, "logits/chosen": -2.684398651123047, "logits/rejected": -2.7180964946746826, "logps/chosen": -186.74403381347656, "logps/rejected": -203.94711303710938, "loss": 0.7122, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6536604762077332, "rewards/margins": 0.010378487408161163, "rewards/rejected": -0.6640389561653137, "step": 755 }, { "epoch": 0.99, "learning_rate": 3.974097222684532e-05, "logits/chosen": -2.380387544631958, "logits/rejected": -2.638767719268799, "logps/chosen": -143.05783081054688, "logps/rejected": -187.193359375, "loss": 0.7379, "rewards/accuracies": 0.375, "rewards/chosen": -0.7300621271133423, "rewards/margins": 0.04467529058456421, "rewards/rejected": -0.7747373580932617, "step": 756 }, { "epoch": 0.99, "learning_rate": 3.9712018220441596e-05, "logits/chosen": -2.4832100868225098, "logits/rejected": -2.588329792022705, "logps/chosen": -214.15151977539062, "logps/rejected": -225.47116088867188, "loss": 0.6362, "rewards/accuracies": 0.6875, "rewards/chosen": -0.670771598815918, "rewards/margins": 0.2148047238588333, "rewards/rejected": -0.8855763673782349, "step": 757 }, { "epoch": 0.99, "learning_rate": 3.9683033994290767e-05, "logits/chosen": -2.648101806640625, "logits/rejected": -2.6975598335266113, "logps/chosen": -163.26866149902344, "logps/rejected": -165.8538055419922, "loss": 0.6858, "rewards/accuracies": 0.5625, "rewards/chosen": -0.622085690498352, "rewards/margins": 0.08007562160491943, "rewards/rejected": -0.7021613717079163, "step": 758 }, { "epoch": 0.99, "learning_rate": 3.965401960792894e-05, "logits/chosen": -2.549471139907837, "logits/rejected": -2.6215145587921143, "logps/chosen": -155.6943359375, "logps/rejected": -152.1805877685547, "loss": 0.7184, "rewards/accuracies": 0.4375, "rewards/chosen": -0.651746392250061, "rewards/margins": -0.030698398128151894, "rewards/rejected": -0.6210479736328125, "step": 759 }, { "epoch": 0.99, "learning_rate": 3.962497512095412e-05, "logits/chosen": -2.599458932876587, "logits/rejected": -2.577223062515259, "logps/chosen": -168.08985900878906, "logps/rejected": -152.3730926513672, "loss": 0.7308, "rewards/accuracies": 0.375, "rewards/chosen": -0.6834767460823059, "rewards/margins": -0.05063985288143158, "rewards/rejected": -0.6328368186950684, "step": 760 }, { "epoch": 1.0, "learning_rate": 3.95959005930262e-05, "logits/chosen": -2.6772971153259277, "logits/rejected": -2.6833689212799072, "logps/chosen": -263.3359375, "logps/rejected": -237.99044799804688, "loss": 0.6881, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8629517555236816, "rewards/margins": 0.06857781112194061, "rewards/rejected": -0.9315295219421387, "step": 761 }, { "epoch": 1.0, "learning_rate": 3.9566796083866756e-05, "logits/chosen": -2.4484739303588867, "logits/rejected": -2.3874497413635254, "logps/chosen": -143.9307403564453, "logps/rejected": -152.17724609375, "loss": 0.6827, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5658427476882935, "rewards/margins": 0.05139755457639694, "rewards/rejected": -0.617240309715271, "step": 762 }, { "epoch": 1.0, "learning_rate": 3.953766165325892e-05, "logits/chosen": -2.5935730934143066, "logits/rejected": -2.6049649715423584, "logps/chosen": -252.82337951660156, "logps/rejected": -228.11959838867188, "loss": 0.8136, "rewards/accuracies": 0.375, "rewards/chosen": -0.8735777735710144, "rewards/margins": -0.16885007917881012, "rewards/rejected": -0.7047276496887207, "step": 763 }, { "epoch": 1.0, "learning_rate": 3.9508497361047334e-05, "logits/chosen": -2.6745100021362305, "logits/rejected": -2.8012068271636963, "logps/chosen": -131.25433349609375, "logps/rejected": -153.92941284179688, "loss": 0.6063, "rewards/accuracies": 0.875, "rewards/chosen": -0.5639945268630981, "rewards/margins": 0.26005828380584717, "rewards/rejected": -0.8240528106689453, "step": 764 }, { "epoch": 1.0, "learning_rate": 3.9479303267137944e-05, "logits/chosen": -2.6887001991271973, "logits/rejected": -2.631093978881836, "logps/chosen": -167.7516632080078, "logps/rejected": -160.34854125976562, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": 0.12638841569423676, "rewards/margins": 1.77146315574646, "rewards/rejected": -1.6450748443603516, "step": 765 }, { "epoch": 1.0, "learning_rate": 3.9450079431497936e-05, "logits/chosen": -2.4736642837524414, "logits/rejected": -2.563966751098633, "logps/chosen": -179.4278564453125, "logps/rejected": -186.982421875, "loss": 0.2379, "rewards/accuracies": 0.9375, "rewards/chosen": 0.33454737067222595, "rewards/margins": 2.0607388019561768, "rewards/rejected": -1.7261914014816284, "step": 766 }, { "epoch": 1.0, "learning_rate": 3.9420825914155554e-05, "logits/chosen": -2.6249935626983643, "logits/rejected": -2.7404773235321045, "logps/chosen": -154.4555206298828, "logps/rejected": -208.5882568359375, "loss": 0.1634, "rewards/accuracies": 0.9375, "rewards/chosen": 0.31436601281166077, "rewards/margins": 2.54439115524292, "rewards/rejected": -2.230025053024292, "step": 767 }, { "epoch": 1.01, "learning_rate": 3.939154277520006e-05, "logits/chosen": -2.4601221084594727, "logits/rejected": -2.501352071762085, "logps/chosen": -144.6915740966797, "logps/rejected": -181.44070434570312, "loss": 0.2538, "rewards/accuracies": 0.875, "rewards/chosen": 0.06428545713424683, "rewards/margins": 1.5839626789093018, "rewards/rejected": -1.5196770429611206, "step": 768 }, { "epoch": 1.01, "learning_rate": 3.9362230074781506e-05, "logits/chosen": -2.6759233474731445, "logits/rejected": -2.677365779876709, "logps/chosen": -187.21469116210938, "logps/rejected": -176.88739013671875, "loss": 0.2348, "rewards/accuracies": 0.875, "rewards/chosen": 0.3633385896682739, "rewards/margins": 2.0741219520568848, "rewards/rejected": -1.7107833623886108, "step": 769 }, { "epoch": 1.01, "learning_rate": 3.9332887873110695e-05, "logits/chosen": -2.677248954772949, "logits/rejected": -2.6646389961242676, "logps/chosen": -165.93006896972656, "logps/rejected": -155.79644775390625, "loss": 0.2781, "rewards/accuracies": 0.875, "rewards/chosen": -0.005598783493041992, "rewards/margins": 1.6051024198532104, "rewards/rejected": -1.6107012033462524, "step": 770 }, { "epoch": 1.01, "learning_rate": 3.9303516230459035e-05, "logits/chosen": -2.54484224319458, "logits/rejected": -2.4810094833374023, "logps/chosen": -156.8101043701172, "logps/rejected": -180.32305908203125, "loss": 0.15, "rewards/accuracies": 1.0, "rewards/chosen": 0.12862610816955566, "rewards/margins": 2.2837862968444824, "rewards/rejected": -2.155160427093506, "step": 771 }, { "epoch": 1.01, "learning_rate": 3.92741152071584e-05, "logits/chosen": -2.670797824859619, "logits/rejected": -2.716228723526001, "logps/chosen": -160.2844696044922, "logps/rejected": -186.7455291748047, "loss": 0.1597, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3678353428840637, "rewards/margins": 2.3705198764801025, "rewards/rejected": -2.0026843547821045, "step": 772 }, { "epoch": 1.01, "learning_rate": 3.924468486360101e-05, "logits/chosen": -2.617213249206543, "logits/rejected": -2.666292905807495, "logps/chosen": -165.1317901611328, "logps/rejected": -208.27572631835938, "loss": 0.1997, "rewards/accuracies": 1.0, "rewards/chosen": 0.14538979530334473, "rewards/margins": 1.8602721691131592, "rewards/rejected": -1.7148823738098145, "step": 773 }, { "epoch": 1.01, "learning_rate": 3.921522526023931e-05, "logits/chosen": -2.7032179832458496, "logits/rejected": -2.6985368728637695, "logps/chosen": -187.04617309570312, "logps/rejected": -231.27273559570312, "loss": 0.1408, "rewards/accuracies": 1.0, "rewards/chosen": 0.4273063540458679, "rewards/margins": 2.765761375427246, "rewards/rejected": -2.3384549617767334, "step": 774 }, { "epoch": 1.01, "learning_rate": 3.918573645758586e-05, "logits/chosen": -2.677338123321533, "logits/rejected": -2.735537528991699, "logps/chosen": -192.94326782226562, "logps/rejected": -228.2654571533203, "loss": 0.1494, "rewards/accuracies": 1.0, "rewards/chosen": 0.17543727159500122, "rewards/margins": 2.4333107471466064, "rewards/rejected": -2.25787353515625, "step": 775 }, { "epoch": 1.02, "learning_rate": 3.915621851621318e-05, "logits/chosen": -2.5823519229888916, "logits/rejected": -2.5766096115112305, "logps/chosen": -141.41311645507812, "logps/rejected": -165.74465942382812, "loss": 0.2209, "rewards/accuracies": 1.0, "rewards/chosen": 0.04889017716050148, "rewards/margins": 1.707082986831665, "rewards/rejected": -1.6581928730010986, "step": 776 }, { "epoch": 1.02, "learning_rate": 3.9126671496753666e-05, "logits/chosen": -2.4788622856140137, "logits/rejected": -2.6510164737701416, "logps/chosen": -140.3416748046875, "logps/rejected": -186.62484741210938, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": 0.08496019244194031, "rewards/margins": 1.8061598539352417, "rewards/rejected": -1.7211997509002686, "step": 777 }, { "epoch": 1.02, "learning_rate": 3.909709545989942e-05, "logits/chosen": -2.6408588886260986, "logits/rejected": -2.648200511932373, "logps/chosen": -174.47097778320312, "logps/rejected": -224.0052947998047, "loss": 0.1737, "rewards/accuracies": 0.9375, "rewards/chosen": 0.22252985835075378, "rewards/margins": 2.2816243171691895, "rewards/rejected": -2.0590946674346924, "step": 778 }, { "epoch": 1.02, "learning_rate": 3.9067490466402156e-05, "logits/chosen": -2.6448774337768555, "logits/rejected": -2.742879629135132, "logps/chosen": -174.5858612060547, "logps/rejected": -204.8096160888672, "loss": 0.1673, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08154253661632538, "rewards/margins": 2.480858325958252, "rewards/rejected": -2.562401056289673, "step": 779 }, { "epoch": 1.02, "learning_rate": 3.903785657707307e-05, "logits/chosen": -2.6244313716888428, "logits/rejected": -2.5507609844207764, "logps/chosen": -161.88987731933594, "logps/rejected": -171.99977111816406, "loss": 0.2874, "rewards/accuracies": 0.9375, "rewards/chosen": 0.13563959300518036, "rewards/margins": 2.074355363845825, "rewards/rejected": -1.9387155771255493, "step": 780 }, { "epoch": 1.02, "learning_rate": 3.9008193852782733e-05, "logits/chosen": -2.402883291244507, "logits/rejected": -2.421910285949707, "logps/chosen": -190.97164916992188, "logps/rejected": -180.68893432617188, "loss": 0.1755, "rewards/accuracies": 0.9375, "rewards/chosen": 0.36178380250930786, "rewards/margins": 2.5330705642700195, "rewards/rejected": -2.1712868213653564, "step": 781 }, { "epoch": 1.02, "learning_rate": 3.897850235446089e-05, "logits/chosen": -2.6583588123321533, "logits/rejected": -2.6155612468719482, "logps/chosen": -177.75830078125, "logps/rejected": -201.97372436523438, "loss": 0.1814, "rewards/accuracies": 0.9375, "rewards/chosen": 0.07586677372455597, "rewards/margins": 2.4885523319244385, "rewards/rejected": -2.4126858711242676, "step": 782 }, { "epoch": 1.02, "learning_rate": 3.894878214309645e-05, "logits/chosen": -2.4456002712249756, "logits/rejected": -2.5392351150512695, "logps/chosen": -147.53250122070312, "logps/rejected": -182.77304077148438, "loss": 0.1543, "rewards/accuracies": 1.0, "rewards/chosen": 0.09826147556304932, "rewards/margins": 2.4370062351226807, "rewards/rejected": -2.3387451171875, "step": 783 }, { "epoch": 1.03, "learning_rate": 3.8919033279737274e-05, "logits/chosen": -2.775362968444824, "logits/rejected": -2.770418405532837, "logps/chosen": -171.49510192871094, "logps/rejected": -208.9770965576172, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": 0.1164775863289833, "rewards/margins": 2.621551513671875, "rewards/rejected": -2.5050742626190186, "step": 784 }, { "epoch": 1.03, "learning_rate": 3.888925582549006e-05, "logits/chosen": -2.382932662963867, "logits/rejected": -2.3689355850219727, "logps/chosen": -214.31198120117188, "logps/rejected": -245.0536651611328, "loss": 0.2215, "rewards/accuracies": 0.875, "rewards/chosen": -0.08265908807516098, "rewards/margins": 2.2869906425476074, "rewards/rejected": -2.369649648666382, "step": 785 }, { "epoch": 1.03, "learning_rate": 3.885944984152027e-05, "logits/chosen": -2.601214647293091, "logits/rejected": -2.5713980197906494, "logps/chosen": -172.2421112060547, "logps/rejected": -205.98236083984375, "loss": 0.2156, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0705888569355011, "rewards/margins": 2.1648547649383545, "rewards/rejected": -2.235443353652954, "step": 786 }, { "epoch": 1.03, "learning_rate": 3.882961538905194e-05, "logits/chosen": -2.7650246620178223, "logits/rejected": -2.756899118423462, "logps/chosen": -159.0460205078125, "logps/rejected": -218.6039581298828, "loss": 0.1533, "rewards/accuracies": 1.0, "rewards/chosen": 0.06594078242778778, "rewards/margins": 2.9581427574157715, "rewards/rejected": -2.892202138900757, "step": 787 }, { "epoch": 1.03, "learning_rate": 3.879975252936761e-05, "logits/chosen": -2.8535146713256836, "logits/rejected": -2.751579999923706, "logps/chosen": -201.79710388183594, "logps/rejected": -212.3226776123047, "loss": 0.0961, "rewards/accuracies": 1.0, "rewards/chosen": 0.07383383810520172, "rewards/margins": 2.675102472305298, "rewards/rejected": -2.601268768310547, "step": 788 }, { "epoch": 1.03, "learning_rate": 3.876986132380814e-05, "logits/chosen": -2.7492191791534424, "logits/rejected": -2.7570226192474365, "logps/chosen": -163.69142150878906, "logps/rejected": -198.6139373779297, "loss": 0.1008, "rewards/accuracies": 1.0, "rewards/chosen": -0.25924035906791687, "rewards/margins": 2.795973539352417, "rewards/rejected": -3.055213451385498, "step": 789 }, { "epoch": 1.03, "learning_rate": 3.8739941833772643e-05, "logits/chosen": -2.767152786254883, "logits/rejected": -2.7644238471984863, "logps/chosen": -194.49136352539062, "logps/rejected": -214.67564392089844, "loss": 0.203, "rewards/accuracies": 0.875, "rewards/chosen": -0.2309228777885437, "rewards/margins": 2.918428421020508, "rewards/rejected": -3.149351119995117, "step": 790 }, { "epoch": 1.04, "learning_rate": 3.870999412071829e-05, "logits/chosen": -2.614018440246582, "logits/rejected": -2.600497245788574, "logps/chosen": -169.53805541992188, "logps/rejected": -169.95814514160156, "loss": 0.2289, "rewards/accuracies": 0.9375, "rewards/chosen": -0.38845112919807434, "rewards/margins": 1.9743072986602783, "rewards/rejected": -2.3627583980560303, "step": 791 }, { "epoch": 1.04, "learning_rate": 3.8680018246160295e-05, "logits/chosen": -2.7072720527648926, "logits/rejected": -2.7555181980133057, "logps/chosen": -170.2549591064453, "logps/rejected": -212.03839111328125, "loss": 0.135, "rewards/accuracies": 1.0, "rewards/chosen": -0.46249496936798096, "rewards/margins": 2.740030288696289, "rewards/rejected": -3.2025249004364014, "step": 792 }, { "epoch": 1.04, "learning_rate": 3.865001427167164e-05, "logits/chosen": -2.697120189666748, "logits/rejected": -2.571460247039795, "logps/chosen": -177.19332885742188, "logps/rejected": -204.76942443847656, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": -0.6775528788566589, "rewards/margins": 2.8083858489990234, "rewards/rejected": -3.485938787460327, "step": 793 }, { "epoch": 1.04, "learning_rate": 3.861998225888307e-05, "logits/chosen": -2.5678954124450684, "logits/rejected": -2.6190593242645264, "logps/chosen": -153.89779663085938, "logps/rejected": -167.26556396484375, "loss": 0.1821, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5305047035217285, "rewards/margins": 2.5454978942871094, "rewards/rejected": -3.076002597808838, "step": 794 }, { "epoch": 1.04, "learning_rate": 3.8589922269482924e-05, "logits/chosen": -2.6134603023529053, "logits/rejected": -2.6522388458251953, "logps/chosen": -229.1393585205078, "logps/rejected": -288.697998046875, "loss": 0.107, "rewards/accuracies": 1.0, "rewards/chosen": -0.4708881080150604, "rewards/margins": 3.699263095855713, "rewards/rejected": -4.1701507568359375, "step": 795 }, { "epoch": 1.04, "learning_rate": 3.855983436521699e-05, "logits/chosen": -2.560218334197998, "logits/rejected": -2.645477533340454, "logps/chosen": -147.77294921875, "logps/rejected": -212.90585327148438, "loss": 0.1269, "rewards/accuracies": 1.0, "rewards/chosen": -0.7539626955986023, "rewards/margins": 3.3151063919067383, "rewards/rejected": -4.069068908691406, "step": 796 }, { "epoch": 1.04, "learning_rate": 3.8529718607888394e-05, "logits/chosen": -2.4999420642852783, "logits/rejected": -2.5179193019866943, "logps/chosen": -152.94679260253906, "logps/rejected": -204.71592712402344, "loss": 0.2147, "rewards/accuracies": 1.0, "rewards/chosen": -1.2636586427688599, "rewards/margins": 2.3519301414489746, "rewards/rejected": -3.615588903427124, "step": 797 }, { "epoch": 1.04, "learning_rate": 3.8499575059357506e-05, "logits/chosen": -2.671869993209839, "logits/rejected": -2.640197515487671, "logps/chosen": -181.50674438476562, "logps/rejected": -215.46429443359375, "loss": 0.1806, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2009189128875732, "rewards/margins": 2.5449750423431396, "rewards/rejected": -3.745894193649292, "step": 798 }, { "epoch": 1.05, "learning_rate": 3.8469403781541745e-05, "logits/chosen": -2.6338608264923096, "logits/rejected": -2.760087728500366, "logps/chosen": -197.01071166992188, "logps/rejected": -244.93154907226562, "loss": 0.1091, "rewards/accuracies": 1.0, "rewards/chosen": -1.5004479885101318, "rewards/margins": 3.055344820022583, "rewards/rejected": -4.555792808532715, "step": 799 }, { "epoch": 1.05, "learning_rate": 3.843920483641551e-05, "logits/chosen": -2.66325044631958, "logits/rejected": -2.715461015701294, "logps/chosen": -186.20314025878906, "logps/rejected": -194.35206604003906, "loss": 0.1372, "rewards/accuracies": 1.0, "rewards/chosen": -1.1806678771972656, "rewards/margins": 2.5824830532073975, "rewards/rejected": -3.763150691986084, "step": 800 }, { "epoch": 1.05, "learning_rate": 3.840897828601002e-05, "logits/chosen": -2.532099485397339, "logits/rejected": -2.458197593688965, "logps/chosen": -220.65689086914062, "logps/rejected": -277.5340576171875, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -1.040308952331543, "rewards/margins": 3.4944987297058105, "rewards/rejected": -4.5348076820373535, "step": 801 }, { "epoch": 1.05, "learning_rate": 3.83787241924132e-05, "logits/chosen": -2.4101672172546387, "logits/rejected": -2.51715350151062, "logps/chosen": -151.6785888671875, "logps/rejected": -230.41976928710938, "loss": 0.1068, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8751517534255981, "rewards/margins": 4.202584743499756, "rewards/rejected": -5.077736854553223, "step": 802 }, { "epoch": 1.05, "learning_rate": 3.8348442617769564e-05, "logits/chosen": -2.50626277923584, "logits/rejected": -2.552149772644043, "logps/chosen": -178.29891967773438, "logps/rejected": -220.8915252685547, "loss": 0.076, "rewards/accuracies": 1.0, "rewards/chosen": -1.427808165550232, "rewards/margins": 3.7678184509277344, "rewards/rejected": -5.195626258850098, "step": 803 }, { "epoch": 1.05, "learning_rate": 3.831813362428005e-05, "logits/chosen": -2.5403552055358887, "logits/rejected": -2.5728914737701416, "logps/chosen": -184.95680236816406, "logps/rejected": -228.92166137695312, "loss": 0.2942, "rewards/accuracies": 0.75, "rewards/chosen": -1.236476182937622, "rewards/margins": 2.8043324947357178, "rewards/rejected": -4.04080867767334, "step": 804 }, { "epoch": 1.05, "learning_rate": 3.8287797274201934e-05, "logits/chosen": -2.654428005218506, "logits/rejected": -2.609748363494873, "logps/chosen": -171.26904296875, "logps/rejected": -200.14511108398438, "loss": 0.1712, "rewards/accuracies": 1.0, "rewards/chosen": -1.2014542818069458, "rewards/margins": 2.651529312133789, "rewards/rejected": -3.8529834747314453, "step": 805 }, { "epoch": 1.05, "learning_rate": 3.825743362984868e-05, "logits/chosen": -2.585634231567383, "logits/rejected": -2.6210579872131348, "logps/chosen": -174.72225952148438, "logps/rejected": -256.7543640136719, "loss": 0.2016, "rewards/accuracies": 0.875, "rewards/chosen": -1.0417742729187012, "rewards/margins": 4.115874290466309, "rewards/rejected": -5.157649040222168, "step": 806 }, { "epoch": 1.06, "learning_rate": 3.8227042753589824e-05, "logits/chosen": -2.5954151153564453, "logits/rejected": -2.5807945728302, "logps/chosen": -200.4560546875, "logps/rejected": -255.4353485107422, "loss": 0.1359, "rewards/accuracies": 0.875, "rewards/chosen": -1.5734584331512451, "rewards/margins": 4.025257110595703, "rewards/rejected": -5.598715305328369, "step": 807 }, { "epoch": 1.06, "learning_rate": 3.819662470785082e-05, "logits/chosen": -2.7379062175750732, "logits/rejected": -2.7494699954986572, "logps/chosen": -176.42608642578125, "logps/rejected": -223.4091796875, "loss": 0.1528, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2805501222610474, "rewards/margins": 3.246807813644409, "rewards/rejected": -4.527358055114746, "step": 808 }, { "epoch": 1.06, "learning_rate": 3.816617955511296e-05, "logits/chosen": -2.7525413036346436, "logits/rejected": -2.777355432510376, "logps/chosen": -212.89166259765625, "logps/rejected": -217.78802490234375, "loss": 0.2782, "rewards/accuracies": 0.875, "rewards/chosen": -1.408876657485962, "rewards/margins": 2.0644912719726562, "rewards/rejected": -3.4733681678771973, "step": 809 }, { "epoch": 1.06, "learning_rate": 3.8135707357913176e-05, "logits/chosen": -2.537975311279297, "logits/rejected": -2.58882474899292, "logps/chosen": -161.26962280273438, "logps/rejected": -208.24331665039062, "loss": 0.0719, "rewards/accuracies": 1.0, "rewards/chosen": -0.7949110269546509, "rewards/margins": 3.6137166023254395, "rewards/rejected": -4.408627510070801, "step": 810 }, { "epoch": 1.06, "learning_rate": 3.8105208178843984e-05, "logits/chosen": -2.3414132595062256, "logits/rejected": -2.3927347660064697, "logps/chosen": -203.45179748535156, "logps/rejected": -210.25027465820312, "loss": 0.1075, "rewards/accuracies": 1.0, "rewards/chosen": -1.0561504364013672, "rewards/margins": 2.966663122177124, "rewards/rejected": -4.022813320159912, "step": 811 }, { "epoch": 1.06, "learning_rate": 3.8074682080553335e-05, "logits/chosen": -2.690915584564209, "logits/rejected": -2.6338562965393066, "logps/chosen": -207.17599487304688, "logps/rejected": -254.29653930664062, "loss": 0.1109, "rewards/accuracies": 0.9375, "rewards/chosen": -1.002183198928833, "rewards/margins": 3.457009792327881, "rewards/rejected": -4.459193229675293, "step": 812 }, { "epoch": 1.06, "learning_rate": 3.804412912574442e-05, "logits/chosen": -2.4453845024108887, "logits/rejected": -2.4959869384765625, "logps/chosen": -172.0583038330078, "logps/rejected": -218.82098388671875, "loss": 0.2408, "rewards/accuracies": 0.875, "rewards/chosen": -1.0620150566101074, "rewards/margins": 3.0320522785186768, "rewards/rejected": -4.094067096710205, "step": 813 }, { "epoch": 1.07, "learning_rate": 3.801354937717565e-05, "logits/chosen": -2.683800220489502, "logits/rejected": -2.5853464603424072, "logps/chosen": -190.3063507080078, "logps/rejected": -201.25628662109375, "loss": 0.208, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1379764080047607, "rewards/margins": 2.755608081817627, "rewards/rejected": -3.893584728240967, "step": 814 }, { "epoch": 1.07, "learning_rate": 3.798294289766043e-05, "logits/chosen": -2.4754679203033447, "logits/rejected": -2.4798026084899902, "logps/chosen": -183.4767303466797, "logps/rejected": -191.26222229003906, "loss": 0.2135, "rewards/accuracies": 0.875, "rewards/chosen": -1.0173275470733643, "rewards/margins": 2.415278673171997, "rewards/rejected": -3.4326062202453613, "step": 815 }, { "epoch": 1.07, "learning_rate": 3.795230975006712e-05, "logits/chosen": -2.612645387649536, "logits/rejected": -2.5963857173919678, "logps/chosen": -160.24755859375, "logps/rejected": -201.96185302734375, "loss": 0.185, "rewards/accuracies": 0.9375, "rewards/chosen": -1.287351131439209, "rewards/margins": 2.947019338607788, "rewards/rejected": -4.234370231628418, "step": 816 }, { "epoch": 1.07, "learning_rate": 3.792164999731881e-05, "logits/chosen": -2.6370742321014404, "logits/rejected": -2.70023512840271, "logps/chosen": -190.13369750976562, "logps/rejected": -205.581298828125, "loss": 0.2281, "rewards/accuracies": 0.875, "rewards/chosen": -1.7540441751480103, "rewards/margins": 3.2129039764404297, "rewards/rejected": -4.966948509216309, "step": 817 }, { "epoch": 1.07, "learning_rate": 3.789096370239328e-05, "logits/chosen": -2.6114511489868164, "logits/rejected": -2.739351272583008, "logps/chosen": -196.17510986328125, "logps/rejected": -247.4773712158203, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": -1.0242459774017334, "rewards/margins": 4.068726539611816, "rewards/rejected": -5.092972755432129, "step": 818 }, { "epoch": 1.07, "learning_rate": 3.786025092832279e-05, "logits/chosen": -2.3249611854553223, "logits/rejected": -2.407060384750366, "logps/chosen": -207.06622314453125, "logps/rejected": -227.32284545898438, "loss": 0.1706, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2159541845321655, "rewards/margins": 2.7725777626037598, "rewards/rejected": -3.988532066345215, "step": 819 }, { "epoch": 1.07, "learning_rate": 3.782951173819403e-05, "logits/chosen": -2.586280345916748, "logits/rejected": -2.5269079208374023, "logps/chosen": -226.07937622070312, "logps/rejected": -254.896240234375, "loss": 0.1904, "rewards/accuracies": 0.875, "rewards/chosen": -1.9202920198440552, "rewards/margins": 3.539307117462158, "rewards/rejected": -5.459599018096924, "step": 820 }, { "epoch": 1.07, "learning_rate": 3.7798746195147914e-05, "logits/chosen": -2.4165151119232178, "logits/rejected": -2.4409053325653076, "logps/chosen": -226.0311279296875, "logps/rejected": -254.019775390625, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": -1.3306143283843994, "rewards/margins": 2.6145148277282715, "rewards/rejected": -3.945129156112671, "step": 821 }, { "epoch": 1.08, "learning_rate": 3.776795436237954e-05, "logits/chosen": -2.4291183948516846, "logits/rejected": -2.502776861190796, "logps/chosen": -158.3379669189453, "logps/rejected": -233.86962890625, "loss": 0.2155, "rewards/accuracies": 0.875, "rewards/chosen": -1.4095854759216309, "rewards/margins": 3.3275306224823, "rewards/rejected": -4.737115859985352, "step": 822 }, { "epoch": 1.08, "learning_rate": 3.773713630313793e-05, "logits/chosen": -2.695758819580078, "logits/rejected": -2.656249523162842, "logps/chosen": -202.1251220703125, "logps/rejected": -217.68896484375, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": -1.282354712486267, "rewards/margins": 3.205690860748291, "rewards/rejected": -4.488044738769531, "step": 823 }, { "epoch": 1.08, "learning_rate": 3.7706292080726055e-05, "logits/chosen": -2.5363056659698486, "logits/rejected": -2.5430288314819336, "logps/chosen": -205.69509887695312, "logps/rejected": -253.77920532226562, "loss": 0.2525, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4872784614562988, "rewards/margins": 3.4748756885528564, "rewards/rejected": -4.962153911590576, "step": 824 }, { "epoch": 1.08, "learning_rate": 3.767542175850058e-05, "logits/chosen": -2.420227527618408, "logits/rejected": -2.441901445388794, "logps/chosen": -181.02056884765625, "logps/rejected": -211.343505859375, "loss": 0.1606, "rewards/accuracies": 0.875, "rewards/chosen": -1.2797247171401978, "rewards/margins": 2.7236833572387695, "rewards/rejected": -4.003407955169678, "step": 825 }, { "epoch": 1.08, "learning_rate": 3.764452539987179e-05, "logits/chosen": -2.47520112991333, "logits/rejected": -2.458693027496338, "logps/chosen": -167.35470581054688, "logps/rejected": -193.6707305908203, "loss": 0.1663, "rewards/accuracies": 1.0, "rewards/chosen": -1.3829278945922852, "rewards/margins": 2.366436004638672, "rewards/rejected": -3.749363899230957, "step": 826 }, { "epoch": 1.08, "learning_rate": 3.761360306830345e-05, "logits/chosen": -2.5185067653656006, "logits/rejected": -2.50441575050354, "logps/chosen": -178.70388793945312, "logps/rejected": -218.41030883789062, "loss": 0.1615, "rewards/accuracies": 0.875, "rewards/chosen": -1.161661982536316, "rewards/margins": 2.8637630939483643, "rewards/rejected": -4.025424480438232, "step": 827 }, { "epoch": 1.08, "learning_rate": 3.75826548273127e-05, "logits/chosen": -2.633216142654419, "logits/rejected": -2.6944453716278076, "logps/chosen": -178.22970581054688, "logps/rejected": -216.95306396484375, "loss": 0.2062, "rewards/accuracies": 0.8125, "rewards/chosen": -1.33101487159729, "rewards/margins": 2.8503544330596924, "rewards/rejected": -4.181369304656982, "step": 828 }, { "epoch": 1.09, "learning_rate": 3.7551680740469874e-05, "logits/chosen": -2.604398727416992, "logits/rejected": -2.655909299850464, "logps/chosen": -233.8108367919922, "logps/rejected": -283.73272705078125, "loss": 0.133, "rewards/accuracies": 1.0, "rewards/chosen": -1.088188648223877, "rewards/margins": 3.245051622390747, "rewards/rejected": -4.333240032196045, "step": 829 }, { "epoch": 1.09, "learning_rate": 3.752068087139839e-05, "logits/chosen": -2.6542930603027344, "logits/rejected": -2.690643072128296, "logps/chosen": -187.74871826171875, "logps/rejected": -274.4953308105469, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": -1.0974299907684326, "rewards/margins": 4.077320575714111, "rewards/rejected": -5.174749851226807, "step": 830 }, { "epoch": 1.09, "learning_rate": 3.7489655283774657e-05, "logits/chosen": -2.5698750019073486, "logits/rejected": -2.680229902267456, "logps/chosen": -154.89736938476562, "logps/rejected": -205.21786499023438, "loss": 0.1433, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6827576756477356, "rewards/margins": 2.9387474060058594, "rewards/rejected": -3.6215052604675293, "step": 831 }, { "epoch": 1.09, "learning_rate": 3.7458604041327874e-05, "logits/chosen": -2.5519180297851562, "logits/rejected": -2.5656418800354004, "logps/chosen": -190.4122314453125, "logps/rejected": -213.00625610351562, "loss": 0.1569, "rewards/accuracies": 0.9375, "rewards/chosen": -1.285630226135254, "rewards/margins": 2.924367666244507, "rewards/rejected": -4.209997653961182, "step": 832 }, { "epoch": 1.09, "learning_rate": 3.742752720783997e-05, "logits/chosen": -2.514334201812744, "logits/rejected": -2.596501588821411, "logps/chosen": -192.1654052734375, "logps/rejected": -192.47653198242188, "loss": 0.1317, "rewards/accuracies": 1.0, "rewards/chosen": -0.9184039235115051, "rewards/margins": 2.5253825187683105, "rewards/rejected": -3.443786382675171, "step": 833 }, { "epoch": 1.09, "learning_rate": 3.7396424847145425e-05, "logits/chosen": -2.484549045562744, "logits/rejected": -2.6469879150390625, "logps/chosen": -148.88473510742188, "logps/rejected": -203.79083251953125, "loss": 0.146, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3805378675460815, "rewards/margins": 3.031029462814331, "rewards/rejected": -4.411567211151123, "step": 834 }, { "epoch": 1.09, "learning_rate": 3.736529702313114e-05, "logits/chosen": -2.613891363143921, "logits/rejected": -2.6539323329925537, "logps/chosen": -157.11306762695312, "logps/rejected": -214.69085693359375, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.9892842173576355, "rewards/margins": 2.9719948768615723, "rewards/rejected": -3.9612791538238525, "step": 835 }, { "epoch": 1.09, "learning_rate": 3.733414379973635e-05, "logits/chosen": -2.5769026279449463, "logits/rejected": -2.5845699310302734, "logps/chosen": -206.44366455078125, "logps/rejected": -273.4783630371094, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": -1.1630650758743286, "rewards/margins": 3.958601474761963, "rewards/rejected": -5.12166690826416, "step": 836 }, { "epoch": 1.1, "learning_rate": 3.730296524095245e-05, "logits/chosen": -2.5311949253082275, "logits/rejected": -2.602909564971924, "logps/chosen": -160.33348083496094, "logps/rejected": -215.58802795410156, "loss": 0.0861, "rewards/accuracies": 1.0, "rewards/chosen": -1.3376214504241943, "rewards/margins": 3.4596052169799805, "rewards/rejected": -4.797226428985596, "step": 837 }, { "epoch": 1.1, "learning_rate": 3.7271761410822856e-05, "logits/chosen": -2.503412961959839, "logits/rejected": -2.4912285804748535, "logps/chosen": -210.4758758544922, "logps/rejected": -248.14144897460938, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": -1.2760006189346313, "rewards/margins": 3.6639609336853027, "rewards/rejected": -4.939960956573486, "step": 838 }, { "epoch": 1.1, "learning_rate": 3.724053237344294e-05, "logits/chosen": -2.5100913047790527, "logits/rejected": -2.6398231983184814, "logps/chosen": -195.51553344726562, "logps/rejected": -267.8132629394531, "loss": 0.1612, "rewards/accuracies": 0.9375, "rewards/chosen": -1.472798466682434, "rewards/margins": 3.3320348262786865, "rewards/rejected": -4.80483341217041, "step": 839 }, { "epoch": 1.1, "learning_rate": 3.720927819295979e-05, "logits/chosen": -2.625965118408203, "logits/rejected": -2.5181195735931396, "logps/chosen": -195.74607849121094, "logps/rejected": -225.23878479003906, "loss": 0.2915, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5482072830200195, "rewards/margins": 2.775569438934326, "rewards/rejected": -4.3237762451171875, "step": 840 }, { "epoch": 1.1, "learning_rate": 3.7177998933572186e-05, "logits/chosen": -2.4342215061187744, "logits/rejected": -2.4397995471954346, "logps/chosen": -174.26548767089844, "logps/rejected": -205.57427978515625, "loss": 0.1855, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7328426837921143, "rewards/margins": 2.65775990486145, "rewards/rejected": -4.3906025886535645, "step": 841 }, { "epoch": 1.1, "learning_rate": 3.7146694659530425e-05, "logits/chosen": -2.3403022289276123, "logits/rejected": -2.435096263885498, "logps/chosen": -164.419921875, "logps/rejected": -218.05117797851562, "loss": 0.2598, "rewards/accuracies": 0.875, "rewards/chosen": -2.064548969268799, "rewards/margins": 2.839548110961914, "rewards/rejected": -4.904096603393555, "step": 842 }, { "epoch": 1.1, "learning_rate": 3.711536543513614e-05, "logits/chosen": -2.5847721099853516, "logits/rejected": -2.605027198791504, "logps/chosen": -209.7950439453125, "logps/rejected": -246.93609619140625, "loss": 0.1416, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1541309356689453, "rewards/margins": 3.411691665649414, "rewards/rejected": -4.565822601318359, "step": 843 }, { "epoch": 1.1, "learning_rate": 3.708401132474228e-05, "logits/chosen": -2.5608434677124023, "logits/rejected": -2.5270862579345703, "logps/chosen": -178.58570861816406, "logps/rejected": -224.67947387695312, "loss": 0.0892, "rewards/accuracies": 0.9375, "rewards/chosen": -0.514496922492981, "rewards/margins": 4.215465068817139, "rewards/rejected": -4.72996187210083, "step": 844 }, { "epoch": 1.11, "learning_rate": 3.705263239275284e-05, "logits/chosen": -2.5800983905792236, "logits/rejected": -2.674860715866089, "logps/chosen": -186.04078674316406, "logps/rejected": -223.0049591064453, "loss": 0.1349, "rewards/accuracies": 0.9375, "rewards/chosen": -1.803220510482788, "rewards/margins": 3.094984531402588, "rewards/rejected": -4.898205280303955, "step": 845 }, { "epoch": 1.11, "learning_rate": 3.702122870362286e-05, "logits/chosen": -2.5478274822235107, "logits/rejected": -2.5513339042663574, "logps/chosen": -183.14073181152344, "logps/rejected": -196.20660400390625, "loss": 0.2509, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5068767070770264, "rewards/margins": 2.48431134223938, "rewards/rejected": -3.9911885261535645, "step": 846 }, { "epoch": 1.11, "learning_rate": 3.698980032185821e-05, "logits/chosen": -2.6585183143615723, "logits/rejected": -2.657508373260498, "logps/chosen": -188.41065979003906, "logps/rejected": -238.37930297851562, "loss": 0.2177, "rewards/accuracies": 0.875, "rewards/chosen": -1.8523781299591064, "rewards/margins": 3.5584514141082764, "rewards/rejected": -5.410830020904541, "step": 847 }, { "epoch": 1.11, "learning_rate": 3.695834731201548e-05, "logits/chosen": -2.541285991668701, "logits/rejected": -2.4983725547790527, "logps/chosen": -168.76828002929688, "logps/rejected": -194.1188507080078, "loss": 0.1259, "rewards/accuracies": 1.0, "rewards/chosen": -1.5406126976013184, "rewards/margins": 3.1631386280059814, "rewards/rejected": -4.703751564025879, "step": 848 }, { "epoch": 1.11, "learning_rate": 3.692686973870184e-05, "logits/chosen": -2.571265935897827, "logits/rejected": -2.594231367111206, "logps/chosen": -198.38992309570312, "logps/rejected": -218.17489624023438, "loss": 0.1462, "rewards/accuracies": 1.0, "rewards/chosen": -1.2703204154968262, "rewards/margins": 2.8772685527801514, "rewards/rejected": -4.147588729858398, "step": 849 }, { "epoch": 1.11, "learning_rate": 3.689536766657494e-05, "logits/chosen": -2.5608391761779785, "logits/rejected": -2.6710116863250732, "logps/chosen": -181.3638153076172, "logps/rejected": -238.46327209472656, "loss": 0.1191, "rewards/accuracies": 1.0, "rewards/chosen": -1.4569287300109863, "rewards/margins": 3.5289573669433594, "rewards/rejected": -4.9858856201171875, "step": 850 }, { "epoch": 1.11, "learning_rate": 3.6863841160342723e-05, "logits/chosen": -2.4190502166748047, "logits/rejected": -2.4080772399902344, "logps/chosen": -168.54371643066406, "logps/rejected": -239.17726135253906, "loss": 0.1511, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5876612663269043, "rewards/margins": 3.6042914390563965, "rewards/rejected": -5.191952228546143, "step": 851 }, { "epoch": 1.12, "learning_rate": 3.683229028476334e-05, "logits/chosen": -2.7769880294799805, "logits/rejected": -2.769570827484131, "logps/chosen": -244.13287353515625, "logps/rejected": -266.6891174316406, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": -1.8334072828292847, "rewards/margins": 4.523943901062012, "rewards/rejected": -6.357351303100586, "step": 852 }, { "epoch": 1.12, "learning_rate": 3.6800715104645e-05, "logits/chosen": -2.6356747150421143, "logits/rejected": -2.676835298538208, "logps/chosen": -202.72198486328125, "logps/rejected": -218.10472106933594, "loss": 0.1855, "rewards/accuracies": 0.875, "rewards/chosen": -1.5705070495605469, "rewards/margins": 2.939119338989258, "rewards/rejected": -4.509626388549805, "step": 853 }, { "epoch": 1.12, "learning_rate": 3.676911568484583e-05, "logits/chosen": -2.4276986122131348, "logits/rejected": -2.474168062210083, "logps/chosen": -172.96237182617188, "logps/rejected": -211.53924560546875, "loss": 0.0993, "rewards/accuracies": 0.9375, "rewards/chosen": -1.662520170211792, "rewards/margins": 2.878678798675537, "rewards/rejected": -4.541199207305908, "step": 854 }, { "epoch": 1.12, "learning_rate": 3.673749209027375e-05, "logits/chosen": -2.4743990898132324, "logits/rejected": -2.4546518325805664, "logps/chosen": -180.78338623046875, "logps/rejected": -203.42556762695312, "loss": 0.1602, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8969523906707764, "rewards/margins": 2.973909378051758, "rewards/rejected": -3.8708620071411133, "step": 855 }, { "epoch": 1.12, "learning_rate": 3.6705844385886334e-05, "logits/chosen": -2.610804796218872, "logits/rejected": -2.624135732650757, "logps/chosen": -185.5303497314453, "logps/rejected": -201.92044067382812, "loss": 0.3314, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7442423105239868, "rewards/margins": 2.786667823791504, "rewards/rejected": -4.530910491943359, "step": 856 }, { "epoch": 1.12, "learning_rate": 3.667417263669068e-05, "logits/chosen": -2.4083616733551025, "logits/rejected": -2.6500542163848877, "logps/chosen": -159.14784240722656, "logps/rejected": -201.2216033935547, "loss": 0.0765, "rewards/accuracies": 1.0, "rewards/chosen": -1.6013761758804321, "rewards/margins": 3.6397151947021484, "rewards/rejected": -5.241092205047607, "step": 857 }, { "epoch": 1.12, "learning_rate": 3.6642476907743276e-05, "logits/chosen": -2.4894661903381348, "logits/rejected": -2.662278652191162, "logps/chosen": -231.25991821289062, "logps/rejected": -341.57867431640625, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": -1.5516610145568848, "rewards/margins": 4.967604160308838, "rewards/rejected": -6.519265174865723, "step": 858 }, { "epoch": 1.12, "learning_rate": 3.661075726414986e-05, "logits/chosen": -2.5153987407684326, "logits/rejected": -2.6378214359283447, "logps/chosen": -197.08767700195312, "logps/rejected": -257.2450256347656, "loss": 0.0809, "rewards/accuracies": 1.0, "rewards/chosen": -1.84122633934021, "rewards/margins": 3.511990785598755, "rewards/rejected": -5.353217124938965, "step": 859 }, { "epoch": 1.13, "learning_rate": 3.6579013771065305e-05, "logits/chosen": -2.3079957962036133, "logits/rejected": -2.3357715606689453, "logps/chosen": -141.00547790527344, "logps/rejected": -158.54039001464844, "loss": 0.2199, "rewards/accuracies": 0.9375, "rewards/chosen": -1.963112711906433, "rewards/margins": 2.2221224308013916, "rewards/rejected": -4.185235500335693, "step": 860 }, { "epoch": 1.13, "learning_rate": 3.654724649369348e-05, "logits/chosen": -2.587371587753296, "logits/rejected": -2.6012768745422363, "logps/chosen": -312.8792419433594, "logps/rejected": -355.51837158203125, "loss": 0.2491, "rewards/accuracies": 0.75, "rewards/chosen": -1.8825641870498657, "rewards/margins": 4.342455863952637, "rewards/rejected": -6.225020408630371, "step": 861 }, { "epoch": 1.13, "learning_rate": 3.651545549728709e-05, "logits/chosen": -2.416578769683838, "logits/rejected": -2.4245691299438477, "logps/chosen": -169.2411346435547, "logps/rejected": -190.55685424804688, "loss": 0.2026, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7248740196228027, "rewards/margins": 2.488800048828125, "rewards/rejected": -4.2136735916137695, "step": 862 }, { "epoch": 1.13, "learning_rate": 3.6483640847147554e-05, "logits/chosen": -2.4201467037200928, "logits/rejected": -2.4285213947296143, "logps/chosen": -189.98886108398438, "logps/rejected": -231.26080322265625, "loss": 0.1565, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8520042896270752, "rewards/margins": 2.9693615436553955, "rewards/rejected": -4.8213653564453125, "step": 863 }, { "epoch": 1.13, "learning_rate": 3.645180260862492e-05, "logits/chosen": -2.3902981281280518, "logits/rejected": -2.4115846157073975, "logps/chosen": -172.95211791992188, "logps/rejected": -297.0254211425781, "loss": 0.1506, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8114925622940063, "rewards/margins": 3.8624112606048584, "rewards/rejected": -5.673903465270996, "step": 864 }, { "epoch": 1.13, "learning_rate": 3.6419940847117626e-05, "logits/chosen": -2.375730514526367, "logits/rejected": -2.5130038261413574, "logps/chosen": -143.1685028076172, "logps/rejected": -220.02430725097656, "loss": 0.1561, "rewards/accuracies": 0.875, "rewards/chosen": -1.8501218557357788, "rewards/margins": 3.1089580059051514, "rewards/rejected": -4.959079742431641, "step": 865 }, { "epoch": 1.13, "learning_rate": 3.638805562807249e-05, "logits/chosen": -2.474574565887451, "logits/rejected": -2.568755865097046, "logps/chosen": -202.5961151123047, "logps/rejected": -257.1708984375, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": -1.8871090412139893, "rewards/margins": 4.0226874351501465, "rewards/rejected": -5.909796714782715, "step": 866 }, { "epoch": 1.13, "learning_rate": 3.635614701698448e-05, "logits/chosen": -2.459073543548584, "logits/rejected": -2.4574649333953857, "logps/chosen": -206.9467315673828, "logps/rejected": -242.87551879882812, "loss": 0.1437, "rewards/accuracies": 0.9375, "rewards/chosen": -2.290987491607666, "rewards/margins": 3.7120237350463867, "rewards/rejected": -6.003011226654053, "step": 867 }, { "epoch": 1.14, "learning_rate": 3.632421507939661e-05, "logits/chosen": -2.6140787601470947, "logits/rejected": -2.612224578857422, "logps/chosen": -223.91006469726562, "logps/rejected": -291.6077880859375, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": -2.003418445587158, "rewards/margins": 4.071234703063965, "rewards/rejected": -6.074653625488281, "step": 868 }, { "epoch": 1.14, "learning_rate": 3.629225988089983e-05, "logits/chosen": -2.4317378997802734, "logits/rejected": -2.6184799671173096, "logps/chosen": -180.1226348876953, "logps/rejected": -190.80392456054688, "loss": 0.1817, "rewards/accuracies": 1.0, "rewards/chosen": -1.4009151458740234, "rewards/margins": 2.961726427078247, "rewards/rejected": -4.362641334533691, "step": 869 }, { "epoch": 1.14, "learning_rate": 3.6260281487132846e-05, "logits/chosen": -2.4276628494262695, "logits/rejected": -2.3554205894470215, "logps/chosen": -184.47543334960938, "logps/rejected": -217.70945739746094, "loss": 0.2233, "rewards/accuracies": 0.875, "rewards/chosen": -1.7305198907852173, "rewards/margins": 3.172408103942871, "rewards/rejected": -4.902928352355957, "step": 870 }, { "epoch": 1.14, "learning_rate": 3.622827996378203e-05, "logits/chosen": -2.5787577629089355, "logits/rejected": -2.5663416385650635, "logps/chosen": -188.5000762939453, "logps/rejected": -188.4753875732422, "loss": 0.2161, "rewards/accuracies": 1.0, "rewards/chosen": -1.2453864812850952, "rewards/margins": 3.2564258575439453, "rewards/rejected": -4.50181245803833, "step": 871 }, { "epoch": 1.14, "learning_rate": 3.6196255376581254e-05, "logits/chosen": -2.5789971351623535, "logits/rejected": -2.5212442874908447, "logps/chosen": -217.9554901123047, "logps/rejected": -214.15769958496094, "loss": 0.3423, "rewards/accuracies": 0.75, "rewards/chosen": -1.4449862241744995, "rewards/margins": 2.885317325592041, "rewards/rejected": -4.330303192138672, "step": 872 }, { "epoch": 1.14, "learning_rate": 3.616420779131177e-05, "logits/chosen": -2.3495595455169678, "logits/rejected": -2.4180235862731934, "logps/chosen": -194.8360595703125, "logps/rejected": -278.6788024902344, "loss": 0.1269, "rewards/accuracies": 0.9375, "rewards/chosen": -1.452872633934021, "rewards/margins": 4.4202799797058105, "rewards/rejected": -5.873153209686279, "step": 873 }, { "epoch": 1.14, "learning_rate": 3.613213727380206e-05, "logits/chosen": -2.5643696784973145, "logits/rejected": -2.60063099861145, "logps/chosen": -172.3582000732422, "logps/rejected": -227.60833740234375, "loss": 0.2933, "rewards/accuracies": 0.875, "rewards/chosen": -1.6060482263565063, "rewards/margins": 3.702348232269287, "rewards/rejected": -5.308396339416504, "step": 874 }, { "epoch": 1.15, "learning_rate": 3.610004388992771e-05, "logits/chosen": -2.4454939365386963, "logits/rejected": -2.536020517349243, "logps/chosen": -174.5612335205078, "logps/rejected": -223.3062744140625, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": -1.4121896028518677, "rewards/margins": 3.2348647117614746, "rewards/rejected": -4.647054195404053, "step": 875 }, { "epoch": 1.15, "learning_rate": 3.6067927705611304e-05, "logits/chosen": -2.4754014015197754, "logits/rejected": -2.569913387298584, "logps/chosen": -169.22718811035156, "logps/rejected": -212.828857421875, "loss": 0.176, "rewards/accuracies": 0.875, "rewards/chosen": -1.641953945159912, "rewards/margins": 3.349804401397705, "rewards/rejected": -4.991758346557617, "step": 876 }, { "epoch": 1.15, "learning_rate": 3.6035788786822225e-05, "logits/chosen": -2.3765363693237305, "logits/rejected": -2.436837911605835, "logps/chosen": -183.66009521484375, "logps/rejected": -224.55966186523438, "loss": 0.151, "rewards/accuracies": 1.0, "rewards/chosen": -1.2727960348129272, "rewards/margins": 2.735487461090088, "rewards/rejected": -4.008283615112305, "step": 877 }, { "epoch": 1.15, "learning_rate": 3.6003627199576564e-05, "logits/chosen": -2.4541356563568115, "logits/rejected": -2.5396711826324463, "logps/chosen": -181.16824340820312, "logps/rejected": -210.95333862304688, "loss": 0.145, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5306426286697388, "rewards/margins": 3.0862133502960205, "rewards/rejected": -4.616855621337891, "step": 878 }, { "epoch": 1.15, "learning_rate": 3.597144300993699e-05, "logits/chosen": -2.450474977493286, "logits/rejected": -2.512716293334961, "logps/chosen": -183.2237548828125, "logps/rejected": -270.755126953125, "loss": 0.1186, "rewards/accuracies": 1.0, "rewards/chosen": -1.4356952905654907, "rewards/margins": 3.521200656890869, "rewards/rejected": -4.95689582824707, "step": 879 }, { "epoch": 1.15, "learning_rate": 3.593923628401259e-05, "logits/chosen": -2.49372935295105, "logits/rejected": -2.5204477310180664, "logps/chosen": -191.3932342529297, "logps/rejected": -233.7782440185547, "loss": 0.11, "rewards/accuracies": 1.0, "rewards/chosen": -1.4455353021621704, "rewards/margins": 2.7963688373565674, "rewards/rejected": -4.241904258728027, "step": 880 }, { "epoch": 1.15, "learning_rate": 3.5907007087958726e-05, "logits/chosen": -2.5309414863586426, "logits/rejected": -2.7542734146118164, "logps/chosen": -181.82196044921875, "logps/rejected": -285.6480407714844, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": -1.5503937005996704, "rewards/margins": 3.967815399169922, "rewards/rejected": -5.518209457397461, "step": 881 }, { "epoch": 1.15, "learning_rate": 3.587475548797694e-05, "logits/chosen": -2.563689708709717, "logits/rejected": -2.4793877601623535, "logps/chosen": -157.08343505859375, "logps/rejected": -204.4723358154297, "loss": 0.23, "rewards/accuracies": 0.875, "rewards/chosen": -1.6042224168777466, "rewards/margins": 2.580112934112549, "rewards/rejected": -4.184335231781006, "step": 882 }, { "epoch": 1.16, "learning_rate": 3.5842481550314794e-05, "logits/chosen": -2.490567445755005, "logits/rejected": -2.5565853118896484, "logps/chosen": -176.2081298828125, "logps/rejected": -216.16978454589844, "loss": 0.0932, "rewards/accuracies": 0.9375, "rewards/chosen": -1.209083080291748, "rewards/margins": 3.1953208446502686, "rewards/rejected": -4.4044036865234375, "step": 883 }, { "epoch": 1.16, "learning_rate": 3.581018534126571e-05, "logits/chosen": -2.52408504486084, "logits/rejected": -2.550704002380371, "logps/chosen": -211.55084228515625, "logps/rejected": -234.77528381347656, "loss": 0.1392, "rewards/accuracies": 0.875, "rewards/chosen": -2.2925100326538086, "rewards/margins": 3.1794278621673584, "rewards/rejected": -5.471938133239746, "step": 884 }, { "epoch": 1.16, "learning_rate": 3.577786692716886e-05, "logits/chosen": -2.5428433418273926, "logits/rejected": -2.4777708053588867, "logps/chosen": -165.32713317871094, "logps/rejected": -190.3759765625, "loss": 0.3405, "rewards/accuracies": 0.6875, "rewards/chosen": -2.022003650665283, "rewards/margins": 2.210864543914795, "rewards/rejected": -4.232868194580078, "step": 885 }, { "epoch": 1.16, "learning_rate": 3.574552637440907e-05, "logits/chosen": -2.411600351333618, "logits/rejected": -2.4321343898773193, "logps/chosen": -173.0433807373047, "logps/rejected": -220.9730224609375, "loss": 0.1121, "rewards/accuracies": 1.0, "rewards/chosen": -1.7523713111877441, "rewards/margins": 3.258190631866455, "rewards/rejected": -5.010562419891357, "step": 886 }, { "epoch": 1.16, "learning_rate": 3.571316374941658e-05, "logits/chosen": -2.3936336040496826, "logits/rejected": -2.480240821838379, "logps/chosen": -166.53944396972656, "logps/rejected": -230.98146057128906, "loss": 0.1113, "rewards/accuracies": 0.9375, "rewards/chosen": -0.883888304233551, "rewards/margins": 3.743154287338257, "rewards/rejected": -4.627042770385742, "step": 887 }, { "epoch": 1.16, "learning_rate": 3.568077911866703e-05, "logits/chosen": -2.4695091247558594, "logits/rejected": -2.465665340423584, "logps/chosen": -182.034912109375, "logps/rejected": -231.8602294921875, "loss": 0.1621, "rewards/accuracies": 0.875, "rewards/chosen": -1.706071138381958, "rewards/margins": 4.134305000305176, "rewards/rejected": -5.840375900268555, "step": 888 }, { "epoch": 1.16, "learning_rate": 3.564837254868118e-05, "logits/chosen": -2.471785068511963, "logits/rejected": -2.4700446128845215, "logps/chosen": -168.14251708984375, "logps/rejected": -205.8127899169922, "loss": 0.1355, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1204754114151, "rewards/margins": 2.8358914852142334, "rewards/rejected": -3.956366539001465, "step": 889 }, { "epoch": 1.16, "learning_rate": 3.561594410602495e-05, "logits/chosen": -2.5301220417022705, "logits/rejected": -2.5192551612854004, "logps/chosen": -181.7959442138672, "logps/rejected": -193.1697235107422, "loss": 0.3494, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5961499214172363, "rewards/margins": 2.624248504638672, "rewards/rejected": -4.220398426055908, "step": 890 }, { "epoch": 1.17, "learning_rate": 3.558349385730913e-05, "logits/chosen": -2.67132306098938, "logits/rejected": -2.6529135704040527, "logps/chosen": -168.33639526367188, "logps/rejected": -207.62216186523438, "loss": 0.1778, "rewards/accuracies": 0.875, "rewards/chosen": -1.1539896726608276, "rewards/margins": 3.851142644882202, "rewards/rejected": -5.00513219833374, "step": 891 }, { "epoch": 1.17, "learning_rate": 3.5551021869189286e-05, "logits/chosen": -2.357180595397949, "logits/rejected": -2.373013734817505, "logps/chosen": -189.8119659423828, "logps/rejected": -233.48062133789062, "loss": 0.255, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8167555332183838, "rewards/margins": 2.735703945159912, "rewards/rejected": -4.552459716796875, "step": 892 }, { "epoch": 1.17, "learning_rate": 3.55185282083657e-05, "logits/chosen": -2.5426039695739746, "logits/rejected": -2.6288843154907227, "logps/chosen": -191.44874572753906, "logps/rejected": -240.08837890625, "loss": 0.1437, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5649197101593018, "rewards/margins": 3.80615234375, "rewards/rejected": -5.371071815490723, "step": 893 }, { "epoch": 1.17, "learning_rate": 3.548601294158313e-05, "logits/chosen": -2.582663059234619, "logits/rejected": -2.585432291030884, "logps/chosen": -197.2909393310547, "logps/rejected": -229.80625915527344, "loss": 0.154, "rewards/accuracies": 0.875, "rewards/chosen": -1.6019847393035889, "rewards/margins": 3.1420061588287354, "rewards/rejected": -4.743990421295166, "step": 894 }, { "epoch": 1.17, "learning_rate": 3.5453476135630706e-05, "logits/chosen": -2.5137104988098145, "logits/rejected": -2.6177213191986084, "logps/chosen": -198.3410186767578, "logps/rejected": -256.9710693359375, "loss": 0.1517, "rewards/accuracies": 0.875, "rewards/chosen": -1.4836442470550537, "rewards/margins": 4.688479423522949, "rewards/rejected": -6.172123908996582, "step": 895 }, { "epoch": 1.17, "learning_rate": 3.542091785734184e-05, "logits/chosen": -2.538341760635376, "logits/rejected": -2.463745355606079, "logps/chosen": -190.23663330078125, "logps/rejected": -183.18646240234375, "loss": 0.1924, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9041906595230103, "rewards/margins": 2.5563762187957764, "rewards/rejected": -4.460566520690918, "step": 896 }, { "epoch": 1.17, "learning_rate": 3.538833817359401e-05, "logits/chosen": -2.5077438354492188, "logits/rejected": -2.4932098388671875, "logps/chosen": -174.5640106201172, "logps/rejected": -215.76971435546875, "loss": 0.1574, "rewards/accuracies": 0.875, "rewards/chosen": -1.460890531539917, "rewards/margins": 3.877847194671631, "rewards/rejected": -5.338737487792969, "step": 897 }, { "epoch": 1.18, "learning_rate": 3.5355737151308686e-05, "logits/chosen": -2.5635290145874023, "logits/rejected": -2.613480567932129, "logps/chosen": -174.78395080566406, "logps/rejected": -211.57774353027344, "loss": 0.1425, "rewards/accuracies": 1.0, "rewards/chosen": -1.5586146116256714, "rewards/margins": 3.321430206298828, "rewards/rejected": -4.880044937133789, "step": 898 }, { "epoch": 1.18, "learning_rate": 3.5323114857451174e-05, "logits/chosen": -2.665316581726074, "logits/rejected": -2.619015693664551, "logps/chosen": -179.59425354003906, "logps/rejected": -238.99795532226562, "loss": 0.18, "rewards/accuracies": 0.875, "rewards/chosen": -1.855625867843628, "rewards/margins": 3.34808349609375, "rewards/rejected": -5.203709602355957, "step": 899 }, { "epoch": 1.18, "learning_rate": 3.529047135903045e-05, "logits/chosen": -2.5373458862304688, "logits/rejected": -2.524705410003662, "logps/chosen": -235.49473571777344, "logps/rejected": -215.7819061279297, "loss": 0.1887, "rewards/accuracies": 0.875, "rewards/chosen": -1.3308095932006836, "rewards/margins": 3.505575180053711, "rewards/rejected": -4.8363847732543945, "step": 900 }, { "epoch": 1.18, "learning_rate": 3.525780672309907e-05, "logits/chosen": -2.6096298694610596, "logits/rejected": -2.6740314960479736, "logps/chosen": -153.22409057617188, "logps/rejected": -240.62652587890625, "loss": 0.2415, "rewards/accuracies": 0.8125, "rewards/chosen": -1.565657138824463, "rewards/margins": 3.297717332839966, "rewards/rejected": -4.86337423324585, "step": 901 }, { "epoch": 1.18, "learning_rate": 3.522512101675299e-05, "logits/chosen": -2.3234548568725586, "logits/rejected": -2.458498954772949, "logps/chosen": -162.8458251953125, "logps/rejected": -195.20492553710938, "loss": 0.1239, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0964446067810059, "rewards/margins": 3.5404043197631836, "rewards/rejected": -4.6368489265441895, "step": 902 }, { "epoch": 1.18, "learning_rate": 3.519241430713145e-05, "logits/chosen": -2.495265007019043, "logits/rejected": -2.479097604751587, "logps/chosen": -236.592529296875, "logps/rejected": -252.68394470214844, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -0.9273030161857605, "rewards/margins": 3.584512948989868, "rewards/rejected": -4.511816501617432, "step": 903 }, { "epoch": 1.18, "learning_rate": 3.5159686661416834e-05, "logits/chosen": -2.364778518676758, "logits/rejected": -2.4186415672302246, "logps/chosen": -172.12220764160156, "logps/rejected": -220.8270263671875, "loss": 0.1384, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0367463827133179, "rewards/margins": 2.731386423110962, "rewards/rejected": -3.7681326866149902, "step": 904 }, { "epoch": 1.18, "learning_rate": 3.512693814683456e-05, "logits/chosen": -2.630051612854004, "logits/rejected": -2.6269326210021973, "logps/chosen": -176.4068145751953, "logps/rejected": -267.4722900390625, "loss": 0.1077, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4196218252182007, "rewards/margins": 3.308180809020996, "rewards/rejected": -4.727802753448486, "step": 905 }, { "epoch": 1.19, "learning_rate": 3.5094168830652854e-05, "logits/chosen": -2.50472354888916, "logits/rejected": -2.5423359870910645, "logps/chosen": -184.10879516601562, "logps/rejected": -237.14556884765625, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": -1.1987833976745605, "rewards/margins": 4.0037665367126465, "rewards/rejected": -5.202549934387207, "step": 906 }, { "epoch": 1.19, "learning_rate": 3.506137878018272e-05, "logits/chosen": -2.5517704486846924, "logits/rejected": -2.548060655593872, "logps/chosen": -199.19915771484375, "logps/rejected": -234.20936584472656, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": -1.356502890586853, "rewards/margins": 3.5127556324005127, "rewards/rejected": -4.869258403778076, "step": 907 }, { "epoch": 1.19, "learning_rate": 3.502856806277773e-05, "logits/chosen": -2.4246985912323, "logits/rejected": -2.3955211639404297, "logps/chosen": -199.87081909179688, "logps/rejected": -204.13876342773438, "loss": 0.1924, "rewards/accuracies": 0.875, "rewards/chosen": -1.4611130952835083, "rewards/margins": 3.7410144805908203, "rewards/rejected": -5.202127933502197, "step": 908 }, { "epoch": 1.19, "learning_rate": 3.4995736745833895e-05, "logits/chosen": -2.5710816383361816, "logits/rejected": -2.6107711791992188, "logps/chosen": -157.6302490234375, "logps/rejected": -233.49349975585938, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": -0.9010351300239563, "rewards/margins": 3.532578706741333, "rewards/rejected": -4.4336137771606445, "step": 909 }, { "epoch": 1.19, "learning_rate": 3.496288489678958e-05, "logits/chosen": -2.52079701423645, "logits/rejected": -2.495419502258301, "logps/chosen": -176.95236206054688, "logps/rejected": -222.7263641357422, "loss": 0.1798, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7388372421264648, "rewards/margins": 3.1190779209136963, "rewards/rejected": -4.857914924621582, "step": 910 }, { "epoch": 1.19, "learning_rate": 3.493001258312529e-05, "logits/chosen": -2.7223851680755615, "logits/rejected": -2.791260242462158, "logps/chosen": -206.46063232421875, "logps/rejected": -213.7384796142578, "loss": 0.089, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8603122234344482, "rewards/margins": 3.696009635925293, "rewards/rejected": -4.556321144104004, "step": 911 }, { "epoch": 1.19, "learning_rate": 3.489711987236357e-05, "logits/chosen": -2.5706539154052734, "logits/rejected": -2.581258773803711, "logps/chosen": -203.28652954101562, "logps/rejected": -235.77676391601562, "loss": 0.2027, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5995769500732422, "rewards/margins": 3.2174808979034424, "rewards/rejected": -4.817058086395264, "step": 912 }, { "epoch": 1.2, "learning_rate": 3.4864206832068884e-05, "logits/chosen": -2.6766703128814697, "logits/rejected": -2.6705029010772705, "logps/chosen": -180.23080444335938, "logps/rejected": -235.3216094970703, "loss": 0.1355, "rewards/accuracies": 0.9375, "rewards/chosen": -1.026005506515503, "rewards/margins": 3.379913806915283, "rewards/rejected": -4.405919075012207, "step": 913 }, { "epoch": 1.2, "learning_rate": 3.483127352984742e-05, "logits/chosen": -2.3571276664733887, "logits/rejected": -2.5192885398864746, "logps/chosen": -162.15489196777344, "logps/rejected": -215.78590393066406, "loss": 0.1989, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5191320180892944, "rewards/margins": 3.3156440258026123, "rewards/rejected": -4.834775924682617, "step": 914 }, { "epoch": 1.2, "learning_rate": 3.479832003334702e-05, "logits/chosen": -2.554062604904175, "logits/rejected": -2.588319778442383, "logps/chosen": -220.20921325683594, "logps/rejected": -267.699951171875, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": -1.2863855361938477, "rewards/margins": 3.6802079677581787, "rewards/rejected": -4.9665937423706055, "step": 915 }, { "epoch": 1.2, "learning_rate": 3.476534641025698e-05, "logits/chosen": -2.545753002166748, "logits/rejected": -2.56862735748291, "logps/chosen": -192.8128662109375, "logps/rejected": -249.750732421875, "loss": 0.1533, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2434145212173462, "rewards/margins": 3.0867209434509277, "rewards/rejected": -4.330134868621826, "step": 916 }, { "epoch": 1.2, "learning_rate": 3.4732352728307966e-05, "logits/chosen": -2.8638410568237305, "logits/rejected": -2.8774917125701904, "logps/chosen": -217.84780883789062, "logps/rejected": -254.37258911132812, "loss": 0.101, "rewards/accuracies": 0.9375, "rewards/chosen": -1.095003366470337, "rewards/margins": 4.037415981292725, "rewards/rejected": -5.132419586181641, "step": 917 }, { "epoch": 1.2, "learning_rate": 3.469933905527182e-05, "logits/chosen": -2.551844835281372, "logits/rejected": -2.530813217163086, "logps/chosen": -185.98265075683594, "logps/rejected": -205.4719696044922, "loss": 0.283, "rewards/accuracies": 0.875, "rewards/chosen": -1.1485133171081543, "rewards/margins": 2.621171474456787, "rewards/rejected": -3.7696852684020996, "step": 918 }, { "epoch": 1.2, "learning_rate": 3.466630545896146e-05, "logits/chosen": -2.413818359375, "logits/rejected": -2.587848663330078, "logps/chosen": -165.9118194580078, "logps/rejected": -241.44552612304688, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": -1.4189995527267456, "rewards/margins": 3.7784295082092285, "rewards/rejected": -5.197429180145264, "step": 919 }, { "epoch": 1.2, "learning_rate": 3.463325200723071e-05, "logits/chosen": -2.4706473350524902, "logits/rejected": -2.5091466903686523, "logps/chosen": -201.78915405273438, "logps/rejected": -254.05223083496094, "loss": 0.102, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2410138845443726, "rewards/margins": 3.5578086376190186, "rewards/rejected": -4.798822402954102, "step": 920 }, { "epoch": 1.21, "learning_rate": 3.460017876797422e-05, "logits/chosen": -2.6846604347229004, "logits/rejected": -2.771611213684082, "logps/chosen": -182.0282440185547, "logps/rejected": -241.62359619140625, "loss": 0.1928, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3348745107650757, "rewards/margins": 3.6126434803009033, "rewards/rejected": -4.947518348693848, "step": 921 }, { "epoch": 1.21, "learning_rate": 3.456708580912725e-05, "logits/chosen": -2.4890265464782715, "logits/rejected": -2.4979093074798584, "logps/chosen": -202.8487091064453, "logps/rejected": -264.9278564453125, "loss": 0.1011, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8148360848426819, "rewards/margins": 3.85543155670166, "rewards/rejected": -4.670267581939697, "step": 922 }, { "epoch": 1.21, "learning_rate": 3.453397319866557e-05, "logits/chosen": -2.672006607055664, "logits/rejected": -2.696829319000244, "logps/chosen": -230.58523559570312, "logps/rejected": -281.7519836425781, "loss": 0.2093, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6621661186218262, "rewards/margins": 3.0963659286499023, "rewards/rejected": -4.7585320472717285, "step": 923 }, { "epoch": 1.21, "learning_rate": 3.4500841004605324e-05, "logits/chosen": -2.795738697052002, "logits/rejected": -2.686603546142578, "logps/chosen": -207.413330078125, "logps/rejected": -196.3363494873047, "loss": 0.1551, "rewards/accuracies": 1.0, "rewards/chosen": -1.3747926950454712, "rewards/margins": 2.683774948120117, "rewards/rejected": -4.058568000793457, "step": 924 }, { "epoch": 1.21, "learning_rate": 3.446768929500288e-05, "logits/chosen": -2.398050546646118, "logits/rejected": -2.525655746459961, "logps/chosen": -146.66969299316406, "logps/rejected": -216.01690673828125, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3923161029815674, "rewards/margins": 3.224682092666626, "rewards/rejected": -4.616998195648193, "step": 925 }, { "epoch": 1.21, "learning_rate": 3.443451813795469e-05, "logits/chosen": -2.4953365325927734, "logits/rejected": -2.539217948913574, "logps/chosen": -178.64605712890625, "logps/rejected": -244.44332885742188, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": -0.8870552778244019, "rewards/margins": 4.003358840942383, "rewards/rejected": -4.890414237976074, "step": 926 }, { "epoch": 1.21, "learning_rate": 3.4401327601597174e-05, "logits/chosen": -2.5432627201080322, "logits/rejected": -2.676295757293701, "logps/chosen": -162.99037170410156, "logps/rejected": -223.5609893798828, "loss": 0.1389, "rewards/accuracies": 1.0, "rewards/chosen": -1.2814667224884033, "rewards/margins": 3.612804889678955, "rewards/rejected": -4.894271373748779, "step": 927 }, { "epoch": 1.21, "learning_rate": 3.436811775410651e-05, "logits/chosen": -2.607267141342163, "logits/rejected": -2.661623477935791, "logps/chosen": -182.5743408203125, "logps/rejected": -186.42269897460938, "loss": 0.3344, "rewards/accuracies": 0.875, "rewards/chosen": -1.5140290260314941, "rewards/margins": 2.287888526916504, "rewards/rejected": -3.801917314529419, "step": 928 }, { "epoch": 1.22, "learning_rate": 3.43348886636986e-05, "logits/chosen": -2.422396421432495, "logits/rejected": -2.606943130493164, "logps/chosen": -172.03494262695312, "logps/rejected": -237.93191528320312, "loss": 0.1241, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8440936803817749, "rewards/margins": 4.104944705963135, "rewards/rejected": -4.949038505554199, "step": 929 }, { "epoch": 1.22, "learning_rate": 3.430164039862882e-05, "logits/chosen": -2.6767523288726807, "logits/rejected": -2.6473617553710938, "logps/chosen": -182.46820068359375, "logps/rejected": -241.1190185546875, "loss": 0.1087, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0853631496429443, "rewards/margins": 4.447392463684082, "rewards/rejected": -5.5327558517456055, "step": 930 }, { "epoch": 1.22, "learning_rate": 3.426837302719197e-05, "logits/chosen": -2.4199132919311523, "logits/rejected": -2.390627384185791, "logps/chosen": -169.13812255859375, "logps/rejected": -212.91665649414062, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -1.5601575374603271, "rewards/margins": 4.0769805908203125, "rewards/rejected": -5.637138366699219, "step": 931 }, { "epoch": 1.22, "learning_rate": 3.42350866177221e-05, "logits/chosen": -2.603242874145508, "logits/rejected": -2.6754183769226074, "logps/chosen": -180.8822479248047, "logps/rejected": -224.43988037109375, "loss": 0.2815, "rewards/accuracies": 0.875, "rewards/chosen": -1.2987481355667114, "rewards/margins": 2.230471611022949, "rewards/rejected": -3.52921986579895, "step": 932 }, { "epoch": 1.22, "learning_rate": 3.420178123859233e-05, "logits/chosen": -2.5300185680389404, "logits/rejected": -2.5311319828033447, "logps/chosen": -187.79489135742188, "logps/rejected": -245.074951171875, "loss": 0.1293, "rewards/accuracies": 0.9375, "rewards/chosen": -1.442630410194397, "rewards/margins": 4.003236293792725, "rewards/rejected": -5.445866107940674, "step": 933 }, { "epoch": 1.22, "learning_rate": 3.416845695821476e-05, "logits/chosen": -2.478121757507324, "logits/rejected": -2.551612377166748, "logps/chosen": -170.02886962890625, "logps/rejected": -266.62445068359375, "loss": 0.2426, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5788254737854004, "rewards/margins": 3.298884630203247, "rewards/rejected": -4.877709865570068, "step": 934 }, { "epoch": 1.22, "learning_rate": 3.413511384504034e-05, "logits/chosen": -2.541309356689453, "logits/rejected": -2.6793863773345947, "logps/chosen": -164.0281524658203, "logps/rejected": -277.7725830078125, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -1.379370927810669, "rewards/margins": 4.741353988647461, "rewards/rejected": -6.120724678039551, "step": 935 }, { "epoch": 1.23, "learning_rate": 3.410175196755866e-05, "logits/chosen": -2.5042457580566406, "logits/rejected": -2.5594520568847656, "logps/chosen": -231.91162109375, "logps/rejected": -235.10675048828125, "loss": 0.2673, "rewards/accuracies": 0.875, "rewards/chosen": -1.9761120080947876, "rewards/margins": 2.6676290035247803, "rewards/rejected": -4.643741130828857, "step": 936 }, { "epoch": 1.23, "learning_rate": 3.40683713942979e-05, "logits/chosen": -2.563133478164673, "logits/rejected": -2.469977378845215, "logps/chosen": -238.8995361328125, "logps/rejected": -281.9659118652344, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": -1.357677936553955, "rewards/margins": 4.857703685760498, "rewards/rejected": -6.215381622314453, "step": 937 }, { "epoch": 1.23, "learning_rate": 3.403497219382461e-05, "logits/chosen": -2.6389622688293457, "logits/rejected": -2.6753883361816406, "logps/chosen": -155.08253479003906, "logps/rejected": -225.48619079589844, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": -1.296984314918518, "rewards/margins": 3.8950109481811523, "rewards/rejected": -5.191995143890381, "step": 938 }, { "epoch": 1.23, "learning_rate": 3.400155443474361e-05, "logits/chosen": -2.5711069107055664, "logits/rejected": -2.539632558822632, "logps/chosen": -297.8182067871094, "logps/rejected": -333.8896484375, "loss": 0.1312, "rewards/accuracies": 1.0, "rewards/chosen": -2.230386257171631, "rewards/margins": 4.170665740966797, "rewards/rejected": -6.401051998138428, "step": 939 }, { "epoch": 1.23, "learning_rate": 3.396811818569785e-05, "logits/chosen": -2.684633731842041, "logits/rejected": -2.71695876121521, "logps/chosen": -240.3818359375, "logps/rejected": -274.24285888671875, "loss": 0.1441, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5472981929779053, "rewards/margins": 3.971374750137329, "rewards/rejected": -5.518672466278076, "step": 940 }, { "epoch": 1.23, "learning_rate": 3.3934663515368236e-05, "logits/chosen": -2.6104092597961426, "logits/rejected": -2.594395637512207, "logps/chosen": -194.75103759765625, "logps/rejected": -240.4493408203125, "loss": 0.1974, "rewards/accuracies": 0.875, "rewards/chosen": -1.7447320222854614, "rewards/margins": 3.628329277038574, "rewards/rejected": -5.373061656951904, "step": 941 }, { "epoch": 1.23, "learning_rate": 3.3901190492473554e-05, "logits/chosen": -2.6302521228790283, "logits/rejected": -2.7227888107299805, "logps/chosen": -170.1224365234375, "logps/rejected": -237.1372528076172, "loss": 0.1896, "rewards/accuracies": 0.9375, "rewards/chosen": -1.631825566291809, "rewards/margins": 2.9237256050109863, "rewards/rejected": -4.555551052093506, "step": 942 }, { "epoch": 1.23, "learning_rate": 3.3867699185770255e-05, "logits/chosen": -2.7121634483337402, "logits/rejected": -2.681286573410034, "logps/chosen": -186.2149200439453, "logps/rejected": -199.51651000976562, "loss": 0.152, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9240370988845825, "rewards/margins": 2.6966371536254883, "rewards/rejected": -4.620674133300781, "step": 943 }, { "epoch": 1.24, "learning_rate": 3.383418966405234e-05, "logits/chosen": -2.650979995727539, "logits/rejected": -2.7104907035827637, "logps/chosen": -201.94180297851562, "logps/rejected": -240.47752380371094, "loss": 0.2521, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5346249341964722, "rewards/margins": 3.591853618621826, "rewards/rejected": -5.126478672027588, "step": 944 }, { "epoch": 1.24, "learning_rate": 3.3800661996151264e-05, "logits/chosen": -2.4865236282348633, "logits/rejected": -2.513885736465454, "logps/chosen": -193.3810577392578, "logps/rejected": -244.85964965820312, "loss": 0.1085, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4318909645080566, "rewards/margins": 4.656887054443359, "rewards/rejected": -6.088778018951416, "step": 945 }, { "epoch": 1.24, "learning_rate": 3.376711625093571e-05, "logits/chosen": -2.6373162269592285, "logits/rejected": -2.581425905227661, "logps/chosen": -169.510009765625, "logps/rejected": -241.111572265625, "loss": 0.13, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7232588529586792, "rewards/margins": 4.364504337310791, "rewards/rejected": -6.08776330947876, "step": 946 }, { "epoch": 1.24, "learning_rate": 3.373355249731153e-05, "logits/chosen": -2.6107871532440186, "logits/rejected": -2.5420989990234375, "logps/chosen": -181.519775390625, "logps/rejected": -240.91998291015625, "loss": 0.2422, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9072514772415161, "rewards/margins": 3.571643352508545, "rewards/rejected": -5.47889518737793, "step": 947 }, { "epoch": 1.24, "learning_rate": 3.369997080422155e-05, "logits/chosen": -2.550039768218994, "logits/rejected": -2.630925178527832, "logps/chosen": -230.58935546875, "logps/rejected": -251.55810546875, "loss": 0.1636, "rewards/accuracies": 1.0, "rewards/chosen": -2.60187029838562, "rewards/margins": 3.3375368118286133, "rewards/rejected": -5.939406871795654, "step": 948 }, { "epoch": 1.24, "learning_rate": 3.366637124064544e-05, "logits/chosen": -2.6128439903259277, "logits/rejected": -2.619659185409546, "logps/chosen": -160.36740112304688, "logps/rejected": -200.9753875732422, "loss": 0.1855, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7813210487365723, "rewards/margins": 2.907754898071289, "rewards/rejected": -4.689075946807861, "step": 949 }, { "epoch": 1.24, "learning_rate": 3.36327538755996e-05, "logits/chosen": -2.562918186187744, "logits/rejected": -2.6631853580474854, "logps/chosen": -201.80294799804688, "logps/rejected": -263.9149475097656, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": -1.5491386651992798, "rewards/margins": 4.151284217834473, "rewards/rejected": -5.700422286987305, "step": 950 }, { "epoch": 1.24, "learning_rate": 3.3599118778136965e-05, "logits/chosen": -2.6970577239990234, "logits/rejected": -2.7352042198181152, "logps/chosen": -186.6042938232422, "logps/rejected": -264.8851318359375, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": -2.238983631134033, "rewards/margins": 3.6914262771606445, "rewards/rejected": -5.930410385131836, "step": 951 }, { "epoch": 1.25, "learning_rate": 3.356546601734692e-05, "logits/chosen": -2.624141216278076, "logits/rejected": -2.6024389266967773, "logps/chosen": -169.18344116210938, "logps/rejected": -194.87644958496094, "loss": 0.2061, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8763582706451416, "rewards/margins": 3.1667044162750244, "rewards/rejected": -5.043062686920166, "step": 952 }, { "epoch": 1.25, "learning_rate": 3.3531795662355115e-05, "logits/chosen": -2.6674022674560547, "logits/rejected": -2.7814764976501465, "logps/chosen": -169.5562286376953, "logps/rejected": -216.2843475341797, "loss": 0.1286, "rewards/accuracies": 1.0, "rewards/chosen": -1.6532480716705322, "rewards/margins": 3.105289936065674, "rewards/rejected": -4.758538246154785, "step": 953 }, { "epoch": 1.25, "learning_rate": 3.349810778232335e-05, "logits/chosen": -2.4326975345611572, "logits/rejected": -2.509242534637451, "logps/chosen": -186.85870361328125, "logps/rejected": -209.04522705078125, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": -1.6442396640777588, "rewards/margins": 3.101438522338867, "rewards/rejected": -4.745677947998047, "step": 954 }, { "epoch": 1.25, "learning_rate": 3.346440244644942e-05, "logits/chosen": -2.597878932952881, "logits/rejected": -2.6052956581115723, "logps/chosen": -194.63372802734375, "logps/rejected": -213.82635498046875, "loss": 0.2907, "rewards/accuracies": 0.8125, "rewards/chosen": -1.813546895980835, "rewards/margins": 2.6921236515045166, "rewards/rejected": -4.505670547485352, "step": 955 }, { "epoch": 1.25, "learning_rate": 3.3430679723966976e-05, "logits/chosen": -2.5130550861358643, "logits/rejected": -2.5555241107940674, "logps/chosen": -144.39450073242188, "logps/rejected": -201.18093872070312, "loss": 0.26, "rewards/accuracies": 0.875, "rewards/chosen": -1.6298218965530396, "rewards/margins": 2.9413676261901855, "rewards/rejected": -4.5711894035339355, "step": 956 }, { "epoch": 1.25, "learning_rate": 3.339693968414538e-05, "logits/chosen": -2.536893844604492, "logits/rejected": -2.672922372817993, "logps/chosen": -144.6156768798828, "logps/rejected": -195.33729553222656, "loss": 0.283, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9383922815322876, "rewards/margins": 2.168663501739502, "rewards/rejected": -4.1070556640625, "step": 957 }, { "epoch": 1.25, "learning_rate": 3.336318239628956e-05, "logits/chosen": -2.6276655197143555, "logits/rejected": -2.6729187965393066, "logps/chosen": -169.65625, "logps/rejected": -203.56922912597656, "loss": 0.1577, "rewards/accuracies": 0.9375, "rewards/chosen": -1.270064115524292, "rewards/margins": 3.2413148880004883, "rewards/rejected": -4.511379241943359, "step": 958 }, { "epoch": 1.26, "learning_rate": 3.3329407929739906e-05, "logits/chosen": -2.708746910095215, "logits/rejected": -2.751272201538086, "logps/chosen": -200.1501922607422, "logps/rejected": -251.32875061035156, "loss": 0.197, "rewards/accuracies": 0.875, "rewards/chosen": -1.698577880859375, "rewards/margins": 3.6546480655670166, "rewards/rejected": -5.3532257080078125, "step": 959 }, { "epoch": 1.26, "learning_rate": 3.3295616353872026e-05, "logits/chosen": -2.5632784366607666, "logits/rejected": -2.617943048477173, "logps/chosen": -173.5696258544922, "logps/rejected": -198.16314697265625, "loss": 0.2387, "rewards/accuracies": 0.875, "rewards/chosen": -1.636803388595581, "rewards/margins": 2.631190299987793, "rewards/rejected": -4.267993450164795, "step": 960 }, { "epoch": 1.26, "learning_rate": 3.326180773809676e-05, "logits/chosen": -2.4898295402526855, "logits/rejected": -2.5250651836395264, "logps/chosen": -193.2848663330078, "logps/rejected": -234.1524658203125, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": -1.6962850093841553, "rewards/margins": 3.261317253112793, "rewards/rejected": -4.957602500915527, "step": 961 }, { "epoch": 1.26, "learning_rate": 3.3227982151859873e-05, "logits/chosen": -2.6020333766937256, "logits/rejected": -2.590635299682617, "logps/chosen": -200.148681640625, "logps/rejected": -242.25833129882812, "loss": 0.1046, "rewards/accuracies": 0.9375, "rewards/chosen": -2.01560640335083, "rewards/margins": 4.58357572555542, "rewards/rejected": -6.59918212890625, "step": 962 }, { "epoch": 1.26, "learning_rate": 3.3194139664642035e-05, "logits/chosen": -2.5765419006347656, "logits/rejected": -2.623060464859009, "logps/chosen": -206.80001831054688, "logps/rejected": -253.6566162109375, "loss": 0.123, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8534865379333496, "rewards/margins": 3.36710786819458, "rewards/rejected": -5.22059440612793, "step": 963 }, { "epoch": 1.26, "learning_rate": 3.3160280345958614e-05, "logits/chosen": -2.706054449081421, "logits/rejected": -2.7712466716766357, "logps/chosen": -218.05075073242188, "logps/rejected": -267.1859436035156, "loss": 0.0725, "rewards/accuracies": 1.0, "rewards/chosen": -1.8123269081115723, "rewards/margins": 3.9265780448913574, "rewards/rejected": -5.738905429840088, "step": 964 }, { "epoch": 1.26, "learning_rate": 3.3126404265359545e-05, "logits/chosen": -2.6757712364196777, "logits/rejected": -2.7458887100219727, "logps/chosen": -201.00808715820312, "logps/rejected": -240.7850341796875, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -1.6226627826690674, "rewards/margins": 4.559228897094727, "rewards/rejected": -6.181891918182373, "step": 965 }, { "epoch": 1.26, "learning_rate": 3.3092511492429216e-05, "logits/chosen": -2.644242763519287, "logits/rejected": -2.695011615753174, "logps/chosen": -162.74783325195312, "logps/rejected": -192.76516723632812, "loss": 0.1974, "rewards/accuracies": 0.875, "rewards/chosen": -2.1141974925994873, "rewards/margins": 2.7216439247131348, "rewards/rejected": -4.835841178894043, "step": 966 }, { "epoch": 1.27, "learning_rate": 3.305860209678628e-05, "logits/chosen": -2.4524073600769043, "logits/rejected": -2.549844264984131, "logps/chosen": -194.18768310546875, "logps/rejected": -271.9709167480469, "loss": 0.2353, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8129292726516724, "rewards/margins": 3.5435562133789062, "rewards/rejected": -5.356485843658447, "step": 967 }, { "epoch": 1.27, "learning_rate": 3.3024676148083555e-05, "logits/chosen": -2.518044948577881, "logits/rejected": -2.5665040016174316, "logps/chosen": -208.9219970703125, "logps/rejected": -242.92112731933594, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -1.6226799488067627, "rewards/margins": 4.56795597076416, "rewards/rejected": -6.190636157989502, "step": 968 }, { "epoch": 1.27, "learning_rate": 3.299073371600784e-05, "logits/chosen": -2.580070972442627, "logits/rejected": -2.726609945297241, "logps/chosen": -179.52996826171875, "logps/rejected": -268.6859130859375, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": -2.1363651752471924, "rewards/margins": 3.852691173553467, "rewards/rejected": -5.989056587219238, "step": 969 }, { "epoch": 1.27, "learning_rate": 3.29567748702798e-05, "logits/chosen": -2.806037425994873, "logits/rejected": -2.7987043857574463, "logps/chosen": -192.37161254882812, "logps/rejected": -251.89358520507812, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -1.6633442640304565, "rewards/margins": 3.375584602355957, "rewards/rejected": -5.038928985595703, "step": 970 }, { "epoch": 1.27, "learning_rate": 3.2922799680653816e-05, "logits/chosen": -2.572334051132202, "logits/rejected": -2.6026675701141357, "logps/chosen": -241.74142456054688, "logps/rejected": -274.17462158203125, "loss": 0.2632, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3693084716796875, "rewards/margins": 2.436305522918701, "rewards/rejected": -4.805613994598389, "step": 971 }, { "epoch": 1.27, "learning_rate": 3.288880821691785e-05, "logits/chosen": -2.696925640106201, "logits/rejected": -2.7146036624908447, "logps/chosen": -237.42230224609375, "logps/rejected": -229.28921508789062, "loss": 0.1131, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2252135276794434, "rewards/margins": 3.5753629207611084, "rewards/rejected": -5.800576686859131, "step": 972 }, { "epoch": 1.27, "learning_rate": 3.285480054889327e-05, "logits/chosen": -2.474309206008911, "logits/rejected": -2.537996292114258, "logps/chosen": -221.17630004882812, "logps/rejected": -248.78753662109375, "loss": 0.1531, "rewards/accuracies": 0.875, "rewards/chosen": -2.4152324199676514, "rewards/margins": 3.2296578884124756, "rewards/rejected": -5.644889831542969, "step": 973 }, { "epoch": 1.27, "learning_rate": 3.2820776746434764e-05, "logits/chosen": -2.652519941329956, "logits/rejected": -2.750889778137207, "logps/chosen": -247.38218688964844, "logps/rejected": -280.9444274902344, "loss": 0.2395, "rewards/accuracies": 0.8125, "rewards/chosen": -2.102778911590576, "rewards/margins": 2.95568585395813, "rewards/rejected": -5.058465003967285, "step": 974 }, { "epoch": 1.28, "learning_rate": 3.278673687943011e-05, "logits/chosen": -2.430567979812622, "logits/rejected": -2.61224365234375, "logps/chosen": -188.28207397460938, "logps/rejected": -259.6468811035156, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": -1.8571250438690186, "rewards/margins": 4.498969554901123, "rewards/rejected": -6.3560943603515625, "step": 975 }, { "epoch": 1.28, "learning_rate": 3.2752681017800144e-05, "logits/chosen": -2.642226457595825, "logits/rejected": -2.6351287364959717, "logps/chosen": -179.3868865966797, "logps/rejected": -236.36964416503906, "loss": 0.1584, "rewards/accuracies": 0.875, "rewards/chosen": -2.0731639862060547, "rewards/margins": 3.906996965408325, "rewards/rejected": -5.980160713195801, "step": 976 }, { "epoch": 1.28, "learning_rate": 3.27186092314985e-05, "logits/chosen": -2.643181562423706, "logits/rejected": -2.6815311908721924, "logps/chosen": -188.3582000732422, "logps/rejected": -238.82688903808594, "loss": 0.1769, "rewards/accuracies": 0.875, "rewards/chosen": -2.3766210079193115, "rewards/margins": 3.4544308185577393, "rewards/rejected": -5.831052303314209, "step": 977 }, { "epoch": 1.28, "learning_rate": 3.2684521590511566e-05, "logits/chosen": -2.5185253620147705, "logits/rejected": -2.6350820064544678, "logps/chosen": -212.4395294189453, "logps/rejected": -258.64971923828125, "loss": 0.0798, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6726995706558228, "rewards/margins": 4.015776634216309, "rewards/rejected": -5.688476085662842, "step": 978 }, { "epoch": 1.28, "learning_rate": 3.2650418164858284e-05, "logits/chosen": -2.552361011505127, "logits/rejected": -2.6091713905334473, "logps/chosen": -183.578857421875, "logps/rejected": -226.1600341796875, "loss": 0.1787, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2989511489868164, "rewards/margins": 4.482161521911621, "rewards/rejected": -6.781111717224121, "step": 979 }, { "epoch": 1.28, "learning_rate": 3.261629902459e-05, "logits/chosen": -2.7338201999664307, "logits/rejected": -2.632646322250366, "logps/chosen": -247.8429718017578, "logps/rejected": -241.37171936035156, "loss": 0.1638, "rewards/accuracies": 0.875, "rewards/chosen": -2.2429699897766113, "rewards/margins": 3.6616616249084473, "rewards/rejected": -5.904631614685059, "step": 980 }, { "epoch": 1.28, "learning_rate": 3.258216423979037e-05, "logits/chosen": -2.3317790031433105, "logits/rejected": -2.4023687839508057, "logps/chosen": -152.5063934326172, "logps/rejected": -252.93801879882812, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": -2.337461471557617, "rewards/margins": 3.838200569152832, "rewards/rejected": -6.175662040710449, "step": 981 }, { "epoch": 1.29, "learning_rate": 3.254801388057514e-05, "logits/chosen": -2.5534322261810303, "logits/rejected": -2.6023335456848145, "logps/chosen": -219.31874084472656, "logps/rejected": -287.8863220214844, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -3.125908613204956, "rewards/margins": 4.897000312805176, "rewards/rejected": -8.022909164428711, "step": 982 }, { "epoch": 1.29, "learning_rate": 3.2513848017092113e-05, "logits/chosen": -2.5682573318481445, "logits/rejected": -2.605602741241455, "logps/chosen": -201.1256103515625, "logps/rejected": -251.9600067138672, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": -1.9630330801010132, "rewards/margins": 4.16124963760376, "rewards/rejected": -6.1242828369140625, "step": 983 }, { "epoch": 1.29, "learning_rate": 3.2479666719520886e-05, "logits/chosen": -2.4335856437683105, "logits/rejected": -2.495433807373047, "logps/chosen": -217.77142333984375, "logps/rejected": -269.6983947753906, "loss": 0.1568, "rewards/accuracies": 0.875, "rewards/chosen": -2.553312063217163, "rewards/margins": 4.387064456939697, "rewards/rejected": -6.940376281738281, "step": 984 }, { "epoch": 1.29, "learning_rate": 3.2445470058072766e-05, "logits/chosen": -2.477215051651001, "logits/rejected": -2.4980826377868652, "logps/chosen": -175.28768920898438, "logps/rejected": -211.9522247314453, "loss": 0.2317, "rewards/accuracies": 0.8125, "rewards/chosen": -2.883258104324341, "rewards/margins": 3.288342237472534, "rewards/rejected": -6.171600341796875, "step": 985 }, { "epoch": 1.29, "learning_rate": 3.2411258102990646e-05, "logits/chosen": -2.698887348175049, "logits/rejected": -2.8316826820373535, "logps/chosen": -198.59188842773438, "logps/rejected": -265.5428161621094, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -2.087355613708496, "rewards/margins": 4.889158248901367, "rewards/rejected": -6.976513385772705, "step": 986 }, { "epoch": 1.29, "learning_rate": 3.23770309245488e-05, "logits/chosen": -2.6792445182800293, "logits/rejected": -2.610353469848633, "logps/chosen": -186.60247802734375, "logps/rejected": -213.04379272460938, "loss": 0.1424, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9458187818527222, "rewards/margins": 3.5154922008514404, "rewards/rejected": -5.461311340332031, "step": 987 }, { "epoch": 1.29, "learning_rate": 3.23427885930528e-05, "logits/chosen": -2.5607926845550537, "logits/rejected": -2.6190388202667236, "logps/chosen": -203.35629272460938, "logps/rejected": -276.7921142578125, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": -2.604539632797241, "rewards/margins": 4.186058521270752, "rewards/rejected": -6.790597915649414, "step": 988 }, { "epoch": 1.29, "learning_rate": 3.230853117883933e-05, "logits/chosen": -2.3824515342712402, "logits/rejected": -2.374831438064575, "logps/chosen": -180.39971923828125, "logps/rejected": -229.66053771972656, "loss": 0.0811, "rewards/accuracies": 1.0, "rewards/chosen": -2.316575527191162, "rewards/margins": 4.488853454589844, "rewards/rejected": -6.805428981781006, "step": 989 }, { "epoch": 1.3, "learning_rate": 3.227425875227605e-05, "logits/chosen": -2.517157793045044, "logits/rejected": -2.5897037982940674, "logps/chosen": -228.96835327148438, "logps/rejected": -249.2227325439453, "loss": 0.0852, "rewards/accuracies": 0.9375, "rewards/chosen": -2.941978693008423, "rewards/margins": 3.9116504192352295, "rewards/rejected": -6.8536295890808105, "step": 990 }, { "epoch": 1.3, "learning_rate": 3.223997138376146e-05, "logits/chosen": -2.5914738178253174, "logits/rejected": -2.5838983058929443, "logps/chosen": -221.4722900390625, "logps/rejected": -228.54957580566406, "loss": 0.1337, "rewards/accuracies": 0.9375, "rewards/chosen": -3.404142379760742, "rewards/margins": 3.357062816619873, "rewards/rejected": -6.761204719543457, "step": 991 }, { "epoch": 1.3, "learning_rate": 3.220566914372477e-05, "logits/chosen": -2.6407084465026855, "logits/rejected": -2.6386706829071045, "logps/chosen": -253.16580200195312, "logps/rejected": -330.81640625, "loss": 0.08, "rewards/accuracies": 0.9375, "rewards/chosen": -2.206897735595703, "rewards/margins": 4.39909553527832, "rewards/rejected": -6.605993270874023, "step": 992 }, { "epoch": 1.3, "learning_rate": 3.2171352102625716e-05, "logits/chosen": -2.722573757171631, "logits/rejected": -2.6421186923980713, "logps/chosen": -250.5634002685547, "logps/rejected": -265.2950439453125, "loss": 0.1562, "rewards/accuracies": 0.9375, "rewards/chosen": -3.815258741378784, "rewards/margins": 3.4340462684631348, "rewards/rejected": -7.249305248260498, "step": 993 }, { "epoch": 1.3, "learning_rate": 3.213702033095444e-05, "logits/chosen": -2.5136289596557617, "logits/rejected": -2.5164742469787598, "logps/chosen": -194.22906494140625, "logps/rejected": -242.98043823242188, "loss": 0.1075, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7019946575164795, "rewards/margins": 4.0128560066223145, "rewards/rejected": -6.714850902557373, "step": 994 }, { "epoch": 1.3, "learning_rate": 3.210267389923135e-05, "logits/chosen": -2.2657485008239746, "logits/rejected": -2.3381145000457764, "logps/chosen": -211.24606323242188, "logps/rejected": -275.7134094238281, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -2.5696375370025635, "rewards/margins": 4.8196611404418945, "rewards/rejected": -7.389298439025879, "step": 995 }, { "epoch": 1.3, "learning_rate": 3.2068312878006955e-05, "logits/chosen": -2.5042364597320557, "logits/rejected": -2.577590227127075, "logps/chosen": -224.34481811523438, "logps/rejected": -294.0433044433594, "loss": 0.1099, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5805823802948, "rewards/margins": 4.027166843414307, "rewards/rejected": -6.607748985290527, "step": 996 }, { "epoch": 1.3, "learning_rate": 3.2033937337861744e-05, "logits/chosen": -2.566772222518921, "logits/rejected": -2.635631561279297, "logps/chosen": -182.17617797851562, "logps/rejected": -221.6566925048828, "loss": 0.1608, "rewards/accuracies": 1.0, "rewards/chosen": -2.865788221359253, "rewards/margins": 4.04500675201416, "rewards/rejected": -6.910794734954834, "step": 997 }, { "epoch": 1.31, "learning_rate": 3.199954734940603e-05, "logits/chosen": -2.416757106781006, "logits/rejected": -2.4752490520477295, "logps/chosen": -227.98826599121094, "logps/rejected": -235.98541259765625, "loss": 0.0961, "rewards/accuracies": 1.0, "rewards/chosen": -3.63022780418396, "rewards/margins": 3.34237003326416, "rewards/rejected": -6.972598075866699, "step": 998 }, { "epoch": 1.31, "learning_rate": 3.196514298327979e-05, "logits/chosen": -2.511079788208008, "logits/rejected": -2.4900567531585693, "logps/chosen": -203.03321838378906, "logps/rejected": -313.2828369140625, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": -3.127234935760498, "rewards/margins": 4.497407913208008, "rewards/rejected": -7.624642848968506, "step": 999 }, { "epoch": 1.31, "learning_rate": 3.193072431015254e-05, "logits/chosen": -2.6488115787506104, "logits/rejected": -2.532362937927246, "logps/chosen": -202.8871612548828, "logps/rejected": -230.8666534423828, "loss": 0.2179, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9239931106567383, "rewards/margins": 3.898569345474243, "rewards/rejected": -6.822562217712402, "step": 1000 } ], "logging_steps": 1, "max_steps": 2292, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }