{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998451213216314, "eval_steps": 100, "global_step": 2904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -2.41489839553833, "logits/rejected": -2.313730239868164, "logps/chosen": -426.6319580078125, "logps/rejected": -209.72433471679688, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.736762762069702, "logits/rejected": -2.701097249984741, "logps/chosen": -275.3837890625, "logps/rejected": -267.7837219238281, "loss": 0.696, "rewards/accuracies": 0.4236111044883728, "rewards/chosen": -0.0019075096352025867, "rewards/margins": -0.0022293850779533386, "rewards/rejected": 0.00032187564647756517, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.6771740913391113, "logits/rejected": -2.614168643951416, "logps/chosen": -282.4807434082031, "logps/rejected": -230.01553344726562, "loss": 0.6902, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": 0.00013565353583544493, "rewards/margins": 0.002631585579365492, "rewards/rejected": -0.0024959323927760124, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.670668363571167, "logits/rejected": -2.628671169281006, "logps/chosen": -254.06240844726562, "logps/rejected": -235.0716094970703, "loss": 0.6828, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0025966805405914783, "rewards/margins": 0.00959115568548441, "rewards/rejected": -0.006994475610554218, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.7003836631774902, "logits/rejected": -2.615793228149414, "logps/chosen": -238.7230682373047, "logps/rejected": -180.0827178955078, "loss": 0.6629, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.025214945897459984, "rewards/margins": 0.05416213348507881, "rewards/rejected": -0.02894718386232853, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.6934826374053955, "logits/rejected": -2.6144022941589355, "logps/chosen": -251.5126495361328, "logps/rejected": -197.26048278808594, "loss": 0.6399, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.046762533485889435, "rewards/margins": 0.12699802219867706, "rewards/rejected": -0.08023548126220703, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.692603349685669, "logits/rejected": -2.6785025596618652, "logps/chosen": -257.05914306640625, "logps/rejected": -247.1437530517578, "loss": 0.6155, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.04687836393713951, "rewards/margins": 0.17602364718914032, "rewards/rejected": -0.1291452944278717, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -2.737905979156494, "logits/rejected": -2.623173952102661, "logps/chosen": -316.3946228027344, "logps/rejected": -238.3878936767578, "loss": 0.5916, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.10290606319904327, "rewards/margins": 0.3630576729774475, "rewards/rejected": -0.26015162467956543, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.6619038581848145, "logits/rejected": -2.6211159229278564, "logps/chosen": -279.396728515625, "logps/rejected": -232.0877685546875, "loss": 0.5671, "rewards/accuracies": 0.78125, "rewards/chosen": 0.05718129128217697, "rewards/margins": 0.43765443563461304, "rewards/rejected": -0.38047313690185547, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -2.6507420539855957, "logits/rejected": -2.575387954711914, "logps/chosen": -272.7942199707031, "logps/rejected": -227.7272491455078, "loss": 0.5676, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.008855322375893593, "rewards/margins": 0.3766869902610779, "rewards/rejected": -0.3855423331260681, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.6757609844207764, "logits/rejected": -2.5984952449798584, "logps/chosen": -278.7255554199219, "logps/rejected": -253.88668823242188, "loss": 0.575, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.0012895159889012575, "rewards/margins": 0.500220775604248, "rewards/rejected": -0.49893131852149963, "step": 100 }, { "epoch": 0.1, "eval_logits/chosen": -2.745777130126953, "eval_logits/rejected": -2.6524808406829834, "eval_logps/chosen": -273.94866943359375, "eval_logps/rejected": -235.7017822265625, "eval_loss": 0.5309013724327087, "eval_rewards/accuracies": 0.7459999918937683, "eval_rewards/chosen": -0.01014842838048935, "eval_rewards/margins": 0.593262791633606, "eval_rewards/rejected": -0.6034111976623535, "eval_runtime": 330.0041, "eval_samples_per_second": 6.061, "eval_steps_per_second": 0.379, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.731081485748291, "logits/rejected": -2.6355175971984863, "logps/chosen": -276.24908447265625, "logps/rejected": -247.2675018310547, "loss": 0.5464, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.021220406517386436, "rewards/margins": 0.625217854976654, "rewards/rejected": -0.6464383006095886, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.715306520462036, "logits/rejected": -2.6525187492370605, "logps/chosen": -274.8995361328125, "logps/rejected": -230.00900268554688, "loss": 0.5043, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.022734543308615685, "rewards/margins": 0.7808176279067993, "rewards/rejected": -0.8035521507263184, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -2.720980405807495, "logits/rejected": -2.6762051582336426, "logps/chosen": -258.5883483886719, "logps/rejected": -226.6556396484375, "loss": 0.5365, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.060701239854097366, "rewards/margins": 0.8721252679824829, "rewards/rejected": -0.9328263998031616, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.7193636894226074, "logits/rejected": -2.6467044353485107, "logps/chosen": -283.06817626953125, "logps/rejected": -243.6209716796875, "loss": 0.5378, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.13080577552318573, "rewards/margins": 0.6716828942298889, "rewards/rejected": -0.8024886250495911, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -2.7493207454681396, "logits/rejected": -2.6163530349731445, "logps/chosen": -282.48358154296875, "logps/rejected": -222.87710571289062, "loss": 0.522, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.12233565002679825, "rewards/margins": 0.6884902119636536, "rewards/rejected": -0.8108257055282593, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.7176194190979004, "logits/rejected": -2.6076390743255615, "logps/chosen": -283.25506591796875, "logps/rejected": -229.7573699951172, "loss": 0.5103, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.0403318852186203, "rewards/margins": 0.8281108140945435, "rewards/rejected": -0.868442714214325, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.715559482574463, "logits/rejected": -2.630178689956665, "logps/chosen": -270.50274658203125, "logps/rejected": -247.9048614501953, "loss": 0.5238, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.018013525754213333, "rewards/margins": 0.9226928949356079, "rewards/rejected": -0.9407063722610474, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -2.668119430541992, "logits/rejected": -2.601747989654541, "logps/chosen": -248.54458618164062, "logps/rejected": -225.4590606689453, "loss": 0.4972, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2065100222826004, "rewards/margins": 0.7541019320487976, "rewards/rejected": -0.9606119394302368, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.6944541931152344, "logits/rejected": -2.607551097869873, "logps/chosen": -268.26678466796875, "logps/rejected": -233.5041961669922, "loss": 0.498, "rewards/accuracies": 0.75, "rewards/chosen": -0.1957332193851471, "rewards/margins": 0.9389954805374146, "rewards/rejected": -1.1347286701202393, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -2.779195785522461, "logits/rejected": -2.6563549041748047, "logps/chosen": -318.3896484375, "logps/rejected": -265.0843505859375, "loss": 0.4759, "rewards/accuracies": 0.78125, "rewards/chosen": -0.09323607385158539, "rewards/margins": 1.1875323057174683, "rewards/rejected": -1.2807685136795044, "step": 200 }, { "epoch": 0.21, "eval_logits/chosen": -2.800610303878784, "eval_logits/rejected": -2.7065932750701904, "eval_logps/chosen": -274.48919677734375, "eval_logps/rejected": -240.49658203125, "eval_loss": 0.49432528018951416, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.06420008093118668, "eval_rewards/margins": 1.0186885595321655, "eval_rewards/rejected": -1.0828887224197388, "eval_runtime": 330.0896, "eval_samples_per_second": 6.059, "eval_steps_per_second": 0.379, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.7947614192962646, "logits/rejected": -2.7344536781311035, "logps/chosen": -279.47857666015625, "logps/rejected": -244.9560546875, "loss": 0.5212, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10790084302425385, "rewards/margins": 0.8937327265739441, "rewards/rejected": -1.0016335248947144, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.730440855026245, "logits/rejected": -2.670431613922119, "logps/chosen": -239.8012237548828, "logps/rejected": -226.3561248779297, "loss": 0.4978, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10349278151988983, "rewards/margins": 1.0418349504470825, "rewards/rejected": -1.1453276872634888, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -2.798637866973877, "logits/rejected": -2.733868360519409, "logps/chosen": -326.19427490234375, "logps/rejected": -257.0804138183594, "loss": 0.5144, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.14514626562595367, "rewards/margins": 1.034011960029602, "rewards/rejected": -1.1791582107543945, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -2.7531001567840576, "logits/rejected": -2.6891307830810547, "logps/chosen": -270.3514404296875, "logps/rejected": -258.6917724609375, "loss": 0.4934, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.23920175433158875, "rewards/margins": 1.1313306093215942, "rewards/rejected": -1.3705322742462158, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -2.6519603729248047, "logits/rejected": -2.569204807281494, "logps/chosen": -292.78662109375, "logps/rejected": -245.4613800048828, "loss": 0.5315, "rewards/accuracies": 0.75, "rewards/chosen": -0.37750571966171265, "rewards/margins": 0.8689467310905457, "rewards/rejected": -1.2464525699615479, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.683892011642456, "logits/rejected": -2.599489450454712, "logps/chosen": -291.04229736328125, "logps/rejected": -281.9589538574219, "loss": 0.4881, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.4090940058231354, "rewards/margins": 1.0680254697799683, "rewards/rejected": -1.4771194458007812, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.6438565254211426, "logits/rejected": -2.536273241043091, "logps/chosen": -297.5185241699219, "logps/rejected": -231.0972137451172, "loss": 0.4995, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.3678213357925415, "rewards/margins": 0.9588603973388672, "rewards/rejected": -1.3266817331314087, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -2.6681864261627197, "logits/rejected": -2.6065673828125, "logps/chosen": -239.76901245117188, "logps/rejected": -237.3007354736328, "loss": 0.5027, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.25205284357070923, "rewards/margins": 1.061631679534912, "rewards/rejected": -1.3136845827102661, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.6735098361968994, "logits/rejected": -2.5926923751831055, "logps/chosen": -266.5799255371094, "logps/rejected": -243.5729217529297, "loss": 0.4921, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22308111190795898, "rewards/margins": 1.2045137882232666, "rewards/rejected": -1.4275949001312256, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982778415614236e-07, "logits/chosen": -2.7693233489990234, "logits/rejected": -2.703303575515747, "logps/chosen": -270.84503173828125, "logps/rejected": -263.55548095703125, "loss": 0.5022, "rewards/accuracies": 0.78125, "rewards/chosen": -0.21006159484386444, "rewards/margins": 1.251680850982666, "rewards/rejected": -1.4617425203323364, "step": 300 }, { "epoch": 0.31, "eval_logits/chosen": -2.8225364685058594, "eval_logits/rejected": -2.7362353801727295, "eval_logps/chosen": -275.37347412109375, "eval_logps/rejected": -242.1844940185547, "eval_loss": 0.4824218451976776, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -0.15262925624847412, "eval_rewards/margins": 1.0990517139434814, "eval_rewards/rejected": -1.2516810894012451, "eval_runtime": 329.0169, "eval_samples_per_second": 6.079, "eval_steps_per_second": 0.38, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963643321852277e-07, "logits/chosen": -2.657869815826416, "logits/rejected": -2.604085683822632, "logps/chosen": -269.7090759277344, "logps/rejected": -248.0914306640625, "loss": 0.4821, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2833268642425537, "rewards/margins": 1.0022751092910767, "rewards/rejected": -1.2856018543243408, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944508228090318e-07, "logits/chosen": -2.753511905670166, "logits/rejected": -2.669100284576416, "logps/chosen": -282.27386474609375, "logps/rejected": -246.5732879638672, "loss": 0.4972, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.3067387044429779, "rewards/margins": 1.3396486043930054, "rewards/rejected": -1.6463874578475952, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.925373134328357e-07, "logits/chosen": -2.638120412826538, "logits/rejected": -2.5762674808502197, "logps/chosen": -251.3295135498047, "logps/rejected": -223.2657470703125, "loss": 0.4817, "rewards/accuracies": 0.78125, "rewards/chosen": -0.36169368028640747, "rewards/margins": 1.3353826999664307, "rewards/rejected": -1.697076439857483, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906238040566398e-07, "logits/chosen": -2.7591946125030518, "logits/rejected": -2.647918939590454, "logps/chosen": -253.46444702148438, "logps/rejected": -215.0161590576172, "loss": 0.4821, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.3391885757446289, "rewards/margins": 1.2555291652679443, "rewards/rejected": -1.5947177410125732, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.887102946804438e-07, "logits/chosen": -2.755331039428711, "logits/rejected": -2.664771556854248, "logps/chosen": -278.7284851074219, "logps/rejected": -265.8031311035156, "loss": 0.5166, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.29750093817710876, "rewards/margins": 1.3157111406326294, "rewards/rejected": -1.61321222782135, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.867967853042479e-07, "logits/chosen": -2.7414841651916504, "logits/rejected": -2.649625778198242, "logps/chosen": -289.3199768066406, "logps/rejected": -261.6228942871094, "loss": 0.507, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4183623790740967, "rewards/margins": 1.3235833644866943, "rewards/rejected": -1.7419458627700806, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.84883275928052e-07, "logits/chosen": -2.6831367015838623, "logits/rejected": -2.6130385398864746, "logps/chosen": -243.5470428466797, "logps/rejected": -231.25369262695312, "loss": 0.5619, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7277100086212158, "rewards/margins": 0.7260710597038269, "rewards/rejected": -1.4537811279296875, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.82969766551856e-07, "logits/chosen": -2.79364275932312, "logits/rejected": -2.704852342605591, "logps/chosen": -284.8489685058594, "logps/rejected": -241.8092041015625, "loss": 0.4803, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4155152440071106, "rewards/margins": 1.2566057443618774, "rewards/rejected": -1.6721210479736328, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810562571756601e-07, "logits/chosen": -2.7232635021209717, "logits/rejected": -2.6678431034088135, "logps/chosen": -290.93505859375, "logps/rejected": -260.04241943359375, "loss": 0.4719, "rewards/accuracies": 0.75, "rewards/chosen": -0.3851390480995178, "rewards/margins": 1.2824820280075073, "rewards/rejected": -1.6676212549209595, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791427477994642e-07, "logits/chosen": -2.7520861625671387, "logits/rejected": -2.6794071197509766, "logps/chosen": -318.5746765136719, "logps/rejected": -269.6056823730469, "loss": 0.5282, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6232264637947083, "rewards/margins": 0.9759466052055359, "rewards/rejected": -1.5991729497909546, "step": 400 }, { "epoch": 0.41, "eval_logits/chosen": -2.792391061782837, "eval_logits/rejected": -2.702341079711914, "eval_logps/chosen": -280.6413269042969, "eval_logps/rejected": -249.08758544921875, "eval_loss": 0.4877680838108063, "eval_rewards/accuracies": 0.7839999794960022, "eval_rewards/chosen": -0.6794139742851257, "eval_rewards/margins": 1.262575387954712, "eval_rewards/rejected": -1.9419893026351929, "eval_runtime": 327.8949, "eval_samples_per_second": 6.1, "eval_steps_per_second": 0.381, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772292384232682e-07, "logits/chosen": -2.709923028945923, "logits/rejected": -2.662933588027954, "logps/chosen": -258.0174560546875, "logps/rejected": -243.55264282226562, "loss": 0.506, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7414777278900146, "rewards/margins": 1.040825605392456, "rewards/rejected": -1.7823032140731812, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753157290470723e-07, "logits/chosen": -2.770792245864868, "logits/rejected": -2.7004318237304688, "logps/chosen": -301.0741271972656, "logps/rejected": -260.6761779785156, "loss": 0.5656, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5612724423408508, "rewards/margins": 0.8177105188369751, "rewards/rejected": -1.3789829015731812, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7340221967087635e-07, "logits/chosen": -2.6355745792388916, "logits/rejected": -2.5235517024993896, "logps/chosen": -259.4550476074219, "logps/rejected": -241.081298828125, "loss": 0.5152, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.33019572496414185, "rewards/margins": 1.2212638854980469, "rewards/rejected": -1.5514596700668335, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.714887102946804e-07, "logits/chosen": -2.6858983039855957, "logits/rejected": -2.606168031692505, "logps/chosen": -286.8215026855469, "logps/rejected": -261.89642333984375, "loss": 0.4437, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27932292222976685, "rewards/margins": 1.2575178146362305, "rewards/rejected": -1.536840796470642, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.6957520091848447e-07, "logits/chosen": -2.6906371116638184, "logits/rejected": -2.62721586227417, "logps/chosen": -282.0963439941406, "logps/rejected": -252.3018035888672, "loss": 0.4949, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.45128607749938965, "rewards/margins": 1.2710622549057007, "rewards/rejected": -1.7223484516143799, "step": 450 }, { "epoch": 0.47, "learning_rate": 4.6766169154228853e-07, "logits/chosen": -2.6066861152648926, "logits/rejected": -2.541717767715454, "logps/chosen": -287.3714904785156, "logps/rejected": -264.9255065917969, "loss": 0.4911, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.575259804725647, "rewards/margins": 1.08199143409729, "rewards/rejected": -1.6572513580322266, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.657481821660926e-07, "logits/chosen": -2.634578227996826, "logits/rejected": -2.5506088733673096, "logps/chosen": -291.7644348144531, "logps/rejected": -266.2176513671875, "loss": 0.4912, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5489755868911743, "rewards/margins": 1.124472975730896, "rewards/rejected": -1.6734485626220703, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.6383467278989666e-07, "logits/chosen": -2.7003040313720703, "logits/rejected": -2.5828516483306885, "logps/chosen": -315.20574951171875, "logps/rejected": -249.669677734375, "loss": 0.4455, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2965030074119568, "rewards/margins": 1.5941083431243896, "rewards/rejected": -1.8906112909317017, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6192116341370067e-07, "logits/chosen": -2.6843831539154053, "logits/rejected": -2.560138702392578, "logps/chosen": -286.26092529296875, "logps/rejected": -245.45443725585938, "loss": 0.5024, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.48061037063598633, "rewards/margins": 1.3923174142837524, "rewards/rejected": -1.8729279041290283, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.6000765403750473e-07, "logits/chosen": -2.7150895595550537, "logits/rejected": -2.6805529594421387, "logps/chosen": -259.2522888183594, "logps/rejected": -249.19253540039062, "loss": 0.5179, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.38437995314598083, "rewards/margins": 0.9828270077705383, "rewards/rejected": -1.3672068119049072, "step": 500 }, { "epoch": 0.52, "eval_logits/chosen": -2.7630860805511475, "eval_logits/rejected": -2.677306652069092, "eval_logps/chosen": -276.4918212890625, "eval_logps/rejected": -244.1531524658203, "eval_loss": 0.4804608225822449, "eval_rewards/accuracies": 0.7760000228881836, "eval_rewards/chosen": -0.26446640491485596, "eval_rewards/margins": 1.1840814352035522, "eval_rewards/rejected": -1.4485478401184082, "eval_runtime": 328.3333, "eval_samples_per_second": 6.091, "eval_steps_per_second": 0.381, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.580941446613088e-07, "logits/chosen": -2.771284580230713, "logits/rejected": -2.660457134246826, "logps/chosen": -299.34197998046875, "logps/rejected": -261.97998046875, "loss": 0.4769, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.30852824449539185, "rewards/margins": 1.3942599296569824, "rewards/rejected": -1.7027881145477295, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.5618063528511285e-07, "logits/chosen": -2.7847771644592285, "logits/rejected": -2.7074437141418457, "logps/chosen": -247.3426513671875, "logps/rejected": -230.1765899658203, "loss": 0.457, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.6353321075439453, "rewards/margins": 1.1674182415008545, "rewards/rejected": -1.8027503490447998, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.542671259089169e-07, "logits/chosen": -2.6685898303985596, "logits/rejected": -2.611987590789795, "logps/chosen": -291.04827880859375, "logps/rejected": -270.1742248535156, "loss": 0.4972, "rewards/accuracies": 0.71875, "rewards/chosen": -0.6013702154159546, "rewards/margins": 1.2920764684677124, "rewards/rejected": -1.893446922302246, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.52353616532721e-07, "logits/chosen": -2.691497802734375, "logits/rejected": -2.620311737060547, "logps/chosen": -271.4696044921875, "logps/rejected": -264.66278076171875, "loss": 0.5215, "rewards/accuracies": 0.75, "rewards/chosen": -0.5857911109924316, "rewards/margins": 1.2745181322097778, "rewards/rejected": -1.860309362411499, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.5044010715652504e-07, "logits/chosen": -2.656611919403076, "logits/rejected": -2.6252033710479736, "logps/chosen": -273.7254943847656, "logps/rejected": -247.2091827392578, "loss": 0.4766, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7783125042915344, "rewards/margins": 0.9800532460212708, "rewards/rejected": -1.7583658695220947, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.485265977803291e-07, "logits/chosen": -2.7592997550964355, "logits/rejected": -2.657989978790283, "logps/chosen": -281.6728820800781, "logps/rejected": -246.2255859375, "loss": 0.4732, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.4413899779319763, "rewards/margins": 1.3485950231552124, "rewards/rejected": -1.7899850606918335, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.4661308840413316e-07, "logits/chosen": -2.7648417949676514, "logits/rejected": -2.6974472999572754, "logps/chosen": -249.6786346435547, "logps/rejected": -236.48934936523438, "loss": 0.4813, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.43044313788414, "rewards/margins": 1.4435796737670898, "rewards/rejected": -1.8740227222442627, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.446995790279372e-07, "logits/chosen": -2.756700038909912, "logits/rejected": -2.6791555881500244, "logps/chosen": -291.4740295410156, "logps/rejected": -262.89801025390625, "loss": 0.4691, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6896177530288696, "rewards/margins": 1.4548016786575317, "rewards/rejected": -2.1444194316864014, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4278606965174123e-07, "logits/chosen": -2.6673600673675537, "logits/rejected": -2.6058640480041504, "logps/chosen": -262.8567810058594, "logps/rejected": -247.41561889648438, "loss": 0.509, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.3871522545814514, "rewards/margins": 1.3774772882461548, "rewards/rejected": -1.764629602432251, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.408725602755453e-07, "logits/chosen": -2.745607376098633, "logits/rejected": -2.6397905349731445, "logps/chosen": -289.14971923828125, "logps/rejected": -248.13156127929688, "loss": 0.4705, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.5186043977737427, "rewards/margins": 1.4924787282943726, "rewards/rejected": -2.0110831260681152, "step": 600 }, { "epoch": 0.62, "eval_logits/chosen": -2.783844232559204, "eval_logits/rejected": -2.7009334564208984, "eval_logps/chosen": -276.8629150390625, "eval_logps/rejected": -245.43368530273438, "eval_loss": 0.47145330905914307, "eval_rewards/accuracies": 0.7559999823570251, "eval_rewards/chosen": -0.30157405138015747, "eval_rewards/margins": 1.2750270366668701, "eval_rewards/rejected": -1.5766010284423828, "eval_runtime": 329.209, "eval_samples_per_second": 6.075, "eval_steps_per_second": 0.38, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3895905089934936e-07, "logits/chosen": -2.776756525039673, "logits/rejected": -2.627607822418213, "logps/chosen": -299.1159362792969, "logps/rejected": -242.3809051513672, "loss": 0.4517, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.31977617740631104, "rewards/margins": 1.6483633518218994, "rewards/rejected": -1.968139410018921, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.370455415231534e-07, "logits/chosen": -2.73073148727417, "logits/rejected": -2.6580116748809814, "logps/chosen": -263.01519775390625, "logps/rejected": -232.265625, "loss": 0.4977, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.4189836084842682, "rewards/margins": 1.2726702690124512, "rewards/rejected": -1.691653847694397, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.351320321469575e-07, "logits/chosen": -2.7166030406951904, "logits/rejected": -2.6070265769958496, "logps/chosen": -264.41058349609375, "logps/rejected": -236.72128295898438, "loss": 0.4824, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.27821844816207886, "rewards/margins": 1.1915299892425537, "rewards/rejected": -1.4697484970092773, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3321852277076154e-07, "logits/chosen": -2.611860513687134, "logits/rejected": -2.5530788898468018, "logps/chosen": -286.221923828125, "logps/rejected": -246.6705780029297, "loss": 0.4779, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.16525471210479736, "rewards/margins": 1.415501594543457, "rewards/rejected": -1.5807561874389648, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313050133945656e-07, "logits/chosen": -2.626971960067749, "logits/rejected": -2.5745906829833984, "logps/chosen": -255.57138061523438, "logps/rejected": -252.6260528564453, "loss": 0.5389, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09872700273990631, "rewards/margins": 1.0981225967407227, "rewards/rejected": -1.1968495845794678, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2939150401836967e-07, "logits/chosen": -2.596104145050049, "logits/rejected": -2.5283610820770264, "logps/chosen": -255.2698516845703, "logps/rejected": -223.36605834960938, "loss": 0.5229, "rewards/accuracies": 0.71875, "rewards/chosen": -0.45211920142173767, "rewards/margins": 0.9975794553756714, "rewards/rejected": -1.4496986865997314, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.2747799464217373e-07, "logits/chosen": -2.6542000770568848, "logits/rejected": -2.574998378753662, "logps/chosen": -261.0472717285156, "logps/rejected": -231.40328979492188, "loss": 0.5082, "rewards/accuracies": 0.6875, "rewards/chosen": -0.492902934551239, "rewards/margins": 1.1550512313842773, "rewards/rejected": -1.647953987121582, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.255644852659778e-07, "logits/chosen": -2.6648573875427246, "logits/rejected": -2.5788636207580566, "logps/chosen": -303.67791748046875, "logps/rejected": -276.10723876953125, "loss": 0.4633, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.35903650522232056, "rewards/margins": 1.5083674192428589, "rewards/rejected": -1.8674037456512451, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.236509758897818e-07, "logits/chosen": -2.6442172527313232, "logits/rejected": -2.5383548736572266, "logps/chosen": -322.7700500488281, "logps/rejected": -257.04278564453125, "loss": 0.503, "rewards/accuracies": 0.75, "rewards/chosen": -0.48487988114356995, "rewards/margins": 1.269715666770935, "rewards/rejected": -1.7545955181121826, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2173746651358586e-07, "logits/chosen": -2.6078996658325195, "logits/rejected": -2.502384901046753, "logps/chosen": -276.53558349609375, "logps/rejected": -268.16131591796875, "loss": 0.5038, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.3312338888645172, "rewards/margins": 1.3475643396377563, "rewards/rejected": -1.6787983179092407, "step": 700 }, { "epoch": 0.72, "eval_logits/chosen": -2.6268980503082275, "eval_logits/rejected": -2.5408694744110107, "eval_logps/chosen": -276.9665832519531, "eval_logps/rejected": -245.39862060546875, "eval_loss": 0.47904127836227417, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -0.31194165349006653, "eval_rewards/margins": 1.261155366897583, "eval_rewards/rejected": -1.5730971097946167, "eval_runtime": 329.3542, "eval_samples_per_second": 6.072, "eval_steps_per_second": 0.38, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.198239571373899e-07, "logits/chosen": -2.565584182739258, "logits/rejected": -2.4798641204833984, "logps/chosen": -289.7725830078125, "logps/rejected": -241.417724609375, "loss": 0.4679, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.13149698078632355, "rewards/margins": 1.4788200855255127, "rewards/rejected": -1.6103169918060303, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.17910447761194e-07, "logits/chosen": -2.5125203132629395, "logits/rejected": -2.440331220626831, "logps/chosen": -243.36306762695312, "logps/rejected": -215.3681640625, "loss": 0.4894, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.39927834272384644, "rewards/margins": 1.3379909992218018, "rewards/rejected": -1.737269401550293, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.1599693838499805e-07, "logits/chosen": -2.523902177810669, "logits/rejected": -2.4464023113250732, "logps/chosen": -322.8412170410156, "logps/rejected": -260.85198974609375, "loss": 0.5377, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4920814633369446, "rewards/margins": 1.0238711833953857, "rewards/rejected": -1.515952467918396, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.140834290088021e-07, "logits/chosen": -2.4797842502593994, "logits/rejected": -2.4306349754333496, "logps/chosen": -260.5240173339844, "logps/rejected": -235.9158172607422, "loss": 0.4844, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26274144649505615, "rewards/margins": 1.1417067050933838, "rewards/rejected": -1.40444815158844, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.121699196326062e-07, "logits/chosen": -2.5010387897491455, "logits/rejected": -2.4390175342559814, "logps/chosen": -267.5348815917969, "logps/rejected": -265.01861572265625, "loss": 0.5134, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.3145067095756531, "rewards/margins": 1.411447286605835, "rewards/rejected": -1.7259540557861328, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.1025641025641024e-07, "logits/chosen": -2.502624988555908, "logits/rejected": -2.4160873889923096, "logps/chosen": -248.45449829101562, "logps/rejected": -230.887939453125, "loss": 0.5244, "rewards/accuracies": 0.71875, "rewards/chosen": -0.5923348665237427, "rewards/margins": 1.0817146301269531, "rewards/rejected": -1.6740491390228271, "step": 760 }, { "epoch": 0.8, "learning_rate": 4.083429008802143e-07, "logits/chosen": -2.600817918777466, "logits/rejected": -2.5673515796661377, "logps/chosen": -283.1528015136719, "logps/rejected": -261.5489501953125, "loss": 0.5599, "rewards/accuracies": 0.75, "rewards/chosen": -0.4175918698310852, "rewards/margins": 1.0563184022903442, "rewards/rejected": -1.4739103317260742, "step": 770 }, { "epoch": 0.81, "learning_rate": 4.0642939150401836e-07, "logits/chosen": -2.5576181411743164, "logits/rejected": -2.5258378982543945, "logps/chosen": -274.6518859863281, "logps/rejected": -244.93185424804688, "loss": 0.4387, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4217241406440735, "rewards/margins": 1.273429036140442, "rewards/rejected": -1.695152997970581, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.0451588212782237e-07, "logits/chosen": -2.5924856662750244, "logits/rejected": -2.5202670097351074, "logps/chosen": -269.2760009765625, "logps/rejected": -251.0695037841797, "loss": 0.4782, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.3948896527290344, "rewards/margins": 1.6062942743301392, "rewards/rejected": -2.0011839866638184, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0260237275162643e-07, "logits/chosen": -2.500338315963745, "logits/rejected": -2.4486918449401855, "logps/chosen": -275.6775817871094, "logps/rejected": -254.4421844482422, "loss": 0.4418, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3929779827594757, "rewards/margins": 1.565860390663147, "rewards/rejected": -1.9588382244110107, "step": 800 }, { "epoch": 0.83, "eval_logits/chosen": -2.5635573863983154, "eval_logits/rejected": -2.4833929538726807, "eval_logps/chosen": -278.41131591796875, "eval_logps/rejected": -249.84423828125, "eval_loss": 0.46652939915657043, "eval_rewards/accuracies": 0.7799999713897705, "eval_rewards/chosen": -0.45641571283340454, "eval_rewards/margins": 1.5612393617630005, "eval_rewards/rejected": -2.01765513420105, "eval_runtime": 328.8068, "eval_samples_per_second": 6.083, "eval_steps_per_second": 0.38, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.006888633754305e-07, "logits/chosen": -2.521207332611084, "logits/rejected": -2.4697537422180176, "logps/chosen": -286.6387634277344, "logps/rejected": -234.17514038085938, "loss": 0.5215, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.6284313201904297, "rewards/margins": 1.2665073871612549, "rewards/rejected": -1.8949388265609741, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9877535399923456e-07, "logits/chosen": -2.600959539413452, "logits/rejected": -2.547930955886841, "logps/chosen": -279.1971435546875, "logps/rejected": -271.48529052734375, "loss": 0.5072, "rewards/accuracies": 0.75, "rewards/chosen": -0.5025497674942017, "rewards/margins": 1.4657728672027588, "rewards/rejected": -1.968322515487671, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.968618446230386e-07, "logits/chosen": -2.5383434295654297, "logits/rejected": -2.4821691513061523, "logps/chosen": -276.9917907714844, "logps/rejected": -230.35025024414062, "loss": 0.4449, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5041875243186951, "rewards/margins": 1.4996968507766724, "rewards/rejected": -2.0038845539093018, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.949483352468427e-07, "logits/chosen": -2.568638563156128, "logits/rejected": -2.5122299194335938, "logps/chosen": -234.30862426757812, "logps/rejected": -238.3922119140625, "loss": 0.4666, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5311821103096008, "rewards/margins": 1.3636810779571533, "rewards/rejected": -1.894863486289978, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9303482587064674e-07, "logits/chosen": -2.550352096557617, "logits/rejected": -2.4809679985046387, "logps/chosen": -277.41363525390625, "logps/rejected": -261.5442810058594, "loss": 0.4991, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.4109998345375061, "rewards/margins": 1.6612545251846313, "rewards/rejected": -2.072254180908203, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.911213164944508e-07, "logits/chosen": -2.5133216381073, "logits/rejected": -2.4779887199401855, "logps/chosen": -280.0914001464844, "logps/rejected": -264.759765625, "loss": 0.4767, "rewards/accuracies": 0.78125, "rewards/chosen": -0.3346596360206604, "rewards/margins": 1.4379736185073853, "rewards/rejected": -1.7726333141326904, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8920780711825487e-07, "logits/chosen": -2.558295488357544, "logits/rejected": -2.4871633052825928, "logps/chosen": -284.81866455078125, "logps/rejected": -264.2315368652344, "loss": 0.5071, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.68181312084198, "rewards/margins": 1.149623155593872, "rewards/rejected": -1.8314363956451416, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.8729429774205893e-07, "logits/chosen": -2.5324788093566895, "logits/rejected": -2.458425998687744, "logps/chosen": -279.7908935546875, "logps/rejected": -257.31512451171875, "loss": 0.4992, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.35828351974487305, "rewards/margins": 1.2599681615829468, "rewards/rejected": -1.6182515621185303, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8538078836586294e-07, "logits/chosen": -2.4853129386901855, "logits/rejected": -2.4347476959228516, "logps/chosen": -281.7766418457031, "logps/rejected": -250.4887237548828, "loss": 0.4881, "rewards/accuracies": 0.75, "rewards/chosen": -0.3670748770236969, "rewards/margins": 1.4128227233886719, "rewards/rejected": -1.7798974514007568, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.83467278989667e-07, "logits/chosen": -2.498248338699341, "logits/rejected": -2.4675116539001465, "logps/chosen": -293.01519775390625, "logps/rejected": -260.99517822265625, "loss": 0.5155, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.6357588768005371, "rewards/margins": 1.182974100112915, "rewards/rejected": -1.8187328577041626, "step": 900 }, { "epoch": 0.93, "eval_logits/chosen": -2.592735528945923, "eval_logits/rejected": -2.511793851852417, "eval_logps/chosen": -277.5622253417969, "eval_logps/rejected": -246.7467803955078, "eval_loss": 0.47698700428009033, "eval_rewards/accuracies": 0.7739999890327454, "eval_rewards/chosen": -0.3715021014213562, "eval_rewards/margins": 1.3364099264144897, "eval_rewards/rejected": -1.7079118490219116, "eval_runtime": 329.3477, "eval_samples_per_second": 6.073, "eval_steps_per_second": 0.38, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.8155376961347106e-07, "logits/chosen": -2.5742714405059814, "logits/rejected": -2.529768466949463, "logps/chosen": -299.46148681640625, "logps/rejected": -274.9568176269531, "loss": 0.5162, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5742639303207397, "rewards/margins": 1.0656424760818481, "rewards/rejected": -1.6399062871932983, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.796402602372751e-07, "logits/chosen": -2.591820001602173, "logits/rejected": -2.5311317443847656, "logps/chosen": -250.81991577148438, "logps/rejected": -271.74761962890625, "loss": 0.4654, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5318310260772705, "rewards/margins": 1.5595569610595703, "rewards/rejected": -2.09138822555542, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.777267508610792e-07, "logits/chosen": -2.615499496459961, "logits/rejected": -2.5490453243255615, "logps/chosen": -298.67694091796875, "logps/rejected": -250.24130249023438, "loss": 0.4747, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.4868291914463043, "rewards/margins": 1.7270475625991821, "rewards/rejected": -2.213876247406006, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7581324148488325e-07, "logits/chosen": -2.6219589710235596, "logits/rejected": -2.500911235809326, "logps/chosen": -284.215576171875, "logps/rejected": -231.4106903076172, "loss": 0.4837, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.46450090408325195, "rewards/margins": 1.5634416341781616, "rewards/rejected": -2.027942657470703, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.738997321086873e-07, "logits/chosen": -2.5877745151519775, "logits/rejected": -2.5523974895477295, "logps/chosen": -296.5566101074219, "logps/rejected": -282.413818359375, "loss": 0.4916, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.7169415354728699, "rewards/margins": 1.3798969984054565, "rewards/rejected": -2.0968384742736816, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7198622273249137e-07, "logits/chosen": -2.5522520542144775, "logits/rejected": -2.5006377696990967, "logps/chosen": -271.15631103515625, "logps/rejected": -253.3765411376953, "loss": 0.4431, "rewards/accuracies": 0.75, "rewards/chosen": -0.6804525852203369, "rewards/margins": 1.3519657850265503, "rewards/rejected": -2.0324184894561768, "step": 960 }, { "epoch": 1.0, "learning_rate": 3.7007271335629544e-07, "logits/chosen": -2.5357367992401123, "logits/rejected": -2.434713363647461, "logps/chosen": -301.4870300292969, "logps/rejected": -240.8806915283203, "loss": 0.5095, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.6089678406715393, "rewards/margins": 1.4054759740829468, "rewards/rejected": -2.014443874359131, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.681592039800995e-07, "logits/chosen": -2.637885570526123, "logits/rejected": -2.5964674949645996, "logps/chosen": -285.35125732421875, "logps/rejected": -281.14105224609375, "loss": 0.3956, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23545095324516296, "rewards/margins": 2.0617527961730957, "rewards/rejected": -2.29720401763916, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.662456946039035e-07, "logits/chosen": -2.633040428161621, "logits/rejected": -2.5896546840667725, "logps/chosen": -291.3595886230469, "logps/rejected": -263.8270263671875, "loss": 0.3593, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06208040565252304, "rewards/margins": 2.071259021759033, "rewards/rejected": -2.1333394050598145, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6433218522770757e-07, "logits/chosen": -2.621001958847046, "logits/rejected": -2.5748400688171387, "logps/chosen": -238.26870727539062, "logps/rejected": -245.1639404296875, "loss": 0.3463, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22620248794555664, "rewards/margins": 1.9476579427719116, "rewards/rejected": -2.1738600730895996, "step": 1000 }, { "epoch": 1.03, "eval_logits/chosen": -2.7082529067993164, "eval_logits/rejected": -2.6281511783599854, "eval_logps/chosen": -279.15203857421875, "eval_logps/rejected": -247.93055725097656, "eval_loss": 0.47547972202301025, "eval_rewards/accuracies": 0.7680000066757202, "eval_rewards/chosen": -0.5304849743843079, "eval_rewards/margins": 1.2958035469055176, "eval_rewards/rejected": -1.8262888193130493, "eval_runtime": 328.7321, "eval_samples_per_second": 6.084, "eval_steps_per_second": 0.38, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6241867585151163e-07, "logits/chosen": -2.6334519386291504, "logits/rejected": -2.5576975345611572, "logps/chosen": -237.9059295654297, "logps/rejected": -202.5619659423828, "loss": 0.2682, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.20000645518302917, "rewards/margins": 1.985250473022461, "rewards/rejected": -2.1852567195892334, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.605051664753157e-07, "logits/chosen": -2.6050827503204346, "logits/rejected": -2.5590696334838867, "logps/chosen": -263.57232666015625, "logps/rejected": -226.03652954101562, "loss": 0.2637, "rewards/accuracies": 0.918749988079071, "rewards/chosen": 0.14606496691703796, "rewards/margins": 2.4412872791290283, "rewards/rejected": -2.295222520828247, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5859165709911975e-07, "logits/chosen": -2.592588424682617, "logits/rejected": -2.560858726501465, "logps/chosen": -251.0801239013672, "logps/rejected": -268.08868408203125, "loss": 0.2414, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16298413276672363, "rewards/margins": 2.507159948348999, "rewards/rejected": -2.3441758155822754, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.566781477229238e-07, "logits/chosen": -2.623647451400757, "logits/rejected": -2.5220494270324707, "logps/chosen": -319.46319580078125, "logps/rejected": -263.6154479980469, "loss": 0.2081, "rewards/accuracies": 0.918749988079071, "rewards/chosen": 0.14128056168556213, "rewards/margins": 2.827650785446167, "rewards/rejected": -2.6863701343536377, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.547646383467279e-07, "logits/chosen": -2.5959651470184326, "logits/rejected": -2.5497353076934814, "logps/chosen": -283.4841613769531, "logps/rejected": -247.54867553710938, "loss": 0.1888, "rewards/accuracies": 0.9375, "rewards/chosen": 0.1350892335176468, "rewards/margins": 2.7649712562561035, "rewards/rejected": -2.6298820972442627, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5285112897053194e-07, "logits/chosen": -2.559783697128296, "logits/rejected": -2.5014069080352783, "logps/chosen": -270.575927734375, "logps/rejected": -260.4879455566406, "loss": 0.1804, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.1834518164396286, "rewards/margins": 3.1269893646240234, "rewards/rejected": -2.943537712097168, "step": 1060 }, { "epoch": 1.1, "learning_rate": 3.50937619594336e-07, "logits/chosen": -2.627401113510132, "logits/rejected": -2.5442299842834473, "logps/chosen": -265.7518310546875, "logps/rejected": -277.06048583984375, "loss": 0.1877, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1024879664182663, "rewards/margins": 2.9685001373291016, "rewards/rejected": -2.8660120964050293, "step": 1070 }, { "epoch": 1.12, "learning_rate": 3.4902411021814007e-07, "logits/chosen": -2.631279706954956, "logits/rejected": -2.5586652755737305, "logps/chosen": -291.9482116699219, "logps/rejected": -279.103759765625, "loss": 0.1456, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08294292539358139, "rewards/margins": 3.53105092048645, "rewards/rejected": -3.448107957839966, "step": 1080 }, { "epoch": 1.13, "learning_rate": 3.4711060084194413e-07, "logits/chosen": -2.5913405418395996, "logits/rejected": -2.5348830223083496, "logps/chosen": -256.9413757324219, "logps/rejected": -251.29953002929688, "loss": 0.1252, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.05543314665555954, "rewards/margins": 3.7001495361328125, "rewards/rejected": -3.644716262817383, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4519709146574814e-07, "logits/chosen": -2.631477117538452, "logits/rejected": -2.6130545139312744, "logps/chosen": -256.7820739746094, "logps/rejected": -247.29861450195312, "loss": 0.1266, "rewards/accuracies": 0.96875, "rewards/chosen": 0.08383353799581528, "rewards/margins": 3.7477478981018066, "rewards/rejected": -3.663914203643799, "step": 1100 }, { "epoch": 1.14, "eval_logits/chosen": -2.6430113315582275, "eval_logits/rejected": -2.5584213733673096, "eval_logps/chosen": -283.978271484375, "eval_logps/rejected": -258.3182067871094, "eval_loss": 0.49244019389152527, "eval_rewards/accuracies": 0.7739999890327454, "eval_rewards/chosen": -1.0131123065948486, "eval_rewards/margins": 1.8519433736801147, "eval_rewards/rejected": -2.8650553226470947, "eval_runtime": 328.4358, "eval_samples_per_second": 6.089, "eval_steps_per_second": 0.381, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.432835820895522e-07, "logits/chosen": -2.6089444160461426, "logits/rejected": -2.569436550140381, "logps/chosen": -277.25665283203125, "logps/rejected": -279.2236633300781, "loss": 0.1245, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09174539893865585, "rewards/margins": 3.8159255981445312, "rewards/rejected": -3.9076714515686035, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.4137007271335626e-07, "logits/chosen": -2.6246414184570312, "logits/rejected": -2.5093507766723633, "logps/chosen": -296.4078674316406, "logps/rejected": -253.695068359375, "loss": 0.1108, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.04911733418703079, "rewards/margins": 3.8723056316375732, "rewards/rejected": -3.823188304901123, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.394565633371603e-07, "logits/chosen": -2.5909955501556396, "logits/rejected": -2.5098001956939697, "logps/chosen": -271.1025390625, "logps/rejected": -267.0798645019531, "loss": 0.1042, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.018337160348892212, "rewards/margins": 4.38796329498291, "rewards/rejected": -4.369626522064209, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.375430539609644e-07, "logits/chosen": -2.603445291519165, "logits/rejected": -2.5317625999450684, "logps/chosen": -267.75177001953125, "logps/rejected": -276.5166931152344, "loss": 0.0957, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.007721236441284418, "rewards/margins": 4.616903781890869, "rewards/rejected": -4.624624252319336, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3562954458476845e-07, "logits/chosen": -2.5505504608154297, "logits/rejected": -2.4800028800964355, "logps/chosen": -244.16177368164062, "logps/rejected": -260.59527587890625, "loss": 0.0964, "rewards/accuracies": 0.96875, "rewards/chosen": 0.06246214359998703, "rewards/margins": 4.644533634185791, "rewards/rejected": -4.582070827484131, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.337160352085725e-07, "logits/chosen": -2.5717084407806396, "logits/rejected": -2.48624587059021, "logps/chosen": -284.52362060546875, "logps/rejected": -286.84625244140625, "loss": 0.0821, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.03565800562500954, "rewards/margins": 4.872807502746582, "rewards/rejected": -4.837149620056152, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.3180252583237657e-07, "logits/chosen": -2.61575984954834, "logits/rejected": -2.5330493450164795, "logps/chosen": -302.53533935546875, "logps/rejected": -303.67889404296875, "loss": 0.0761, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.09367243200540543, "rewards/margins": 5.462395668029785, "rewards/rejected": -5.5560688972473145, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.2988901645618063e-07, "logits/chosen": -2.6104063987731934, "logits/rejected": -2.5553605556488037, "logps/chosen": -269.8067932128906, "logps/rejected": -275.9356994628906, "loss": 0.0647, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 0.17088812589645386, "rewards/margins": 5.2629804611206055, "rewards/rejected": -5.092092990875244, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.279755070799847e-07, "logits/chosen": -2.594054698944092, "logits/rejected": -2.5422348976135254, "logps/chosen": -262.38433837890625, "logps/rejected": -284.35272216796875, "loss": 0.0645, "rewards/accuracies": 0.96875, "rewards/chosen": -0.2543705999851227, "rewards/margins": 5.296311378479004, "rewards/rejected": -5.550681114196777, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.260619977037887e-07, "logits/chosen": -2.606248140335083, "logits/rejected": -2.5383098125457764, "logps/chosen": -306.80389404296875, "logps/rejected": -289.07110595703125, "loss": 0.0751, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.4288966655731201, "rewards/margins": 5.881345272064209, "rewards/rejected": -5.45244836807251, "step": 1200 }, { "epoch": 1.24, "eval_logits/chosen": -2.6450486183166504, "eval_logits/rejected": -2.5574116706848145, "eval_logps/chosen": -288.3549499511719, "eval_logps/rejected": -266.3139343261719, "eval_loss": 0.5208475589752197, "eval_rewards/accuracies": 0.7760000228881836, "eval_rewards/chosen": -1.4507769346237183, "eval_rewards/margins": 2.213848352432251, "eval_rewards/rejected": -3.6646251678466797, "eval_runtime": 328.5316, "eval_samples_per_second": 6.088, "eval_steps_per_second": 0.38, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2414848832759277e-07, "logits/chosen": -2.569955348968506, "logits/rejected": -2.5366978645324707, "logps/chosen": -271.14764404296875, "logps/rejected": -311.7731628417969, "loss": 0.0516, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.0714511051774025, "rewards/margins": 6.200972557067871, "rewards/rejected": -6.129521369934082, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2223497895139683e-07, "logits/chosen": -2.552734851837158, "logits/rejected": -2.444171905517578, "logps/chosen": -288.9889221191406, "logps/rejected": -297.237548828125, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": 0.27881333231925964, "rewards/margins": 6.696805000305176, "rewards/rejected": -6.417990684509277, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.203214695752009e-07, "logits/chosen": -2.5759170055389404, "logits/rejected": -2.5034148693084717, "logps/chosen": -285.6966857910156, "logps/rejected": -329.5125427246094, "loss": 0.0629, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.15722158551216125, "rewards/margins": 6.698854923248291, "rewards/rejected": -6.541632652282715, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.1840796019900495e-07, "logits/chosen": -2.555694818496704, "logits/rejected": -2.485347270965576, "logps/chosen": -288.07611083984375, "logps/rejected": -292.7718505859375, "loss": 0.0525, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.205997496843338, "rewards/margins": 6.743833065032959, "rewards/rejected": -6.53783655166626, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.16494450822809e-07, "logits/chosen": -2.576767683029175, "logits/rejected": -2.534345865249634, "logps/chosen": -238.9010467529297, "logps/rejected": -278.7056579589844, "loss": 0.0425, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": 0.13358671963214874, "rewards/margins": 6.311589241027832, "rewards/rejected": -6.178002834320068, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.145809414466131e-07, "logits/chosen": -2.576357126235962, "logits/rejected": -2.497936248779297, "logps/chosen": -266.5452880859375, "logps/rejected": -300.4448547363281, "loss": 0.0417, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3052632808685303, "rewards/margins": 7.074311256408691, "rewards/rejected": -6.769047737121582, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.1266743207041714e-07, "logits/chosen": -2.5541863441467285, "logits/rejected": -2.496516704559326, "logps/chosen": -262.9737854003906, "logps/rejected": -312.977783203125, "loss": 0.047, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": 0.3359752893447876, "rewards/margins": 7.000231742858887, "rewards/rejected": -6.664257049560547, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.107539226942212e-07, "logits/chosen": -2.4729394912719727, "logits/rejected": -2.4231820106506348, "logps/chosen": -249.3036651611328, "logps/rejected": -301.2051696777344, "loss": 0.0501, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4446950852870941, "rewards/margins": 6.993998050689697, "rewards/rejected": -6.549304008483887, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0884041331802526e-07, "logits/chosen": -2.563807964324951, "logits/rejected": -2.496666431427002, "logps/chosen": -284.2848205566406, "logps/rejected": -292.50872802734375, "loss": 0.0458, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 0.3111470341682434, "rewards/margins": 7.219347953796387, "rewards/rejected": -6.908200740814209, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.0692690394182927e-07, "logits/chosen": -2.505225658416748, "logits/rejected": -2.4466845989227295, "logps/chosen": -245.6407470703125, "logps/rejected": -272.4444274902344, "loss": 0.0306, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.265463650226593, "rewards/margins": 6.57799768447876, "rewards/rejected": -6.843461513519287, "step": 1300 }, { "epoch": 1.34, "eval_logits/chosen": -2.5864531993865967, "eval_logits/rejected": -2.4956560134887695, "eval_logps/chosen": -295.3101806640625, "eval_logps/rejected": -277.1172180175781, "eval_loss": 0.577923059463501, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -2.1463019847869873, "eval_rewards/margins": 2.5986533164978027, "eval_rewards/rejected": -4.744955539703369, "eval_runtime": 327.6113, "eval_samples_per_second": 6.105, "eval_steps_per_second": 0.382, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0501339456563334e-07, "logits/chosen": -2.530522584915161, "logits/rejected": -2.4422647953033447, "logps/chosen": -260.3827819824219, "logps/rejected": -274.53424072265625, "loss": 0.0479, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.38277196884155273, "rewards/margins": 6.145764350891113, "rewards/rejected": -6.52853536605835, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.030998851894374e-07, "logits/chosen": -2.530467987060547, "logits/rejected": -2.457545757293701, "logps/chosen": -280.2557678222656, "logps/rejected": -312.9700622558594, "loss": 0.0435, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.0774858146905899, "rewards/margins": 7.1998748779296875, "rewards/rejected": -7.122389316558838, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.0118637581324146e-07, "logits/chosen": -2.5862784385681152, "logits/rejected": -2.4979710578918457, "logps/chosen": -265.94195556640625, "logps/rejected": -322.9318542480469, "loss": 0.0529, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.03257175534963608, "rewards/margins": 7.4844207763671875, "rewards/rejected": -7.451849460601807, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.992728664370455e-07, "logits/chosen": -2.529265880584717, "logits/rejected": -2.472142457962036, "logps/chosen": -241.907958984375, "logps/rejected": -295.9644775390625, "loss": 0.0453, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.2552769184112549, "rewards/margins": 7.376172065734863, "rewards/rejected": -7.631448268890381, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.973593570608496e-07, "logits/chosen": -2.6861939430236816, "logits/rejected": -2.5757153034210205, "logps/chosen": -304.3006286621094, "logps/rejected": -301.52545166015625, "loss": 0.0396, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 0.03746197372674942, "rewards/margins": 7.219727993011475, "rewards/rejected": -7.1822662353515625, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9544584768465365e-07, "logits/chosen": -2.54135799407959, "logits/rejected": -2.515650749206543, "logps/chosen": -278.438232421875, "logps/rejected": -317.2010498046875, "loss": 0.0398, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.3339845538139343, "rewards/margins": 7.23375940322876, "rewards/rejected": -7.56774377822876, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.935323383084577e-07, "logits/chosen": -2.599730968475342, "logits/rejected": -2.531221628189087, "logps/chosen": -311.6480712890625, "logps/rejected": -324.45635986328125, "loss": 0.0447, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.26491448283195496, "rewards/margins": 7.551673889160156, "rewards/rejected": -7.816588401794434, "step": 1370 }, { "epoch": 1.42, "learning_rate": 2.9161882893226177e-07, "logits/chosen": -2.5754849910736084, "logits/rejected": -2.5394327640533447, "logps/chosen": -259.94171142578125, "logps/rejected": -315.7677001953125, "loss": 0.0517, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3456391394138336, "rewards/margins": 6.892177581787109, "rewards/rejected": -7.23781681060791, "step": 1380 }, { "epoch": 1.44, "learning_rate": 2.8970531955606583e-07, "logits/chosen": -2.6177515983581543, "logits/rejected": -2.5629687309265137, "logps/chosen": -286.0957336425781, "logps/rejected": -307.2629699707031, "loss": 0.0425, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2400694638490677, "rewards/margins": 6.7607903480529785, "rewards/rejected": -6.520721435546875, "step": 1390 }, { "epoch": 1.45, "learning_rate": 2.8779181017986984e-07, "logits/chosen": -2.5180513858795166, "logits/rejected": -2.4203319549560547, "logps/chosen": -262.3363952636719, "logps/rejected": -293.3941955566406, "loss": 0.031, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.30498212575912476, "rewards/margins": 6.864785671234131, "rewards/rejected": -7.1697678565979, "step": 1400 }, { "epoch": 1.45, "eval_logits/chosen": -2.6050970554351807, "eval_logits/rejected": -2.5157244205474854, "eval_logps/chosen": -300.5773620605469, "eval_logps/rejected": -282.7791748046875, "eval_loss": 0.5992786288261414, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -2.673020124435425, "eval_rewards/margins": 2.638129711151123, "eval_rewards/rejected": -5.311149597167969, "eval_runtime": 329.4856, "eval_samples_per_second": 6.07, "eval_steps_per_second": 0.379, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.858783008036739e-07, "logits/chosen": -2.6051602363586426, "logits/rejected": -2.492736339569092, "logps/chosen": -296.25274658203125, "logps/rejected": -316.0076599121094, "loss": 0.0344, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.7290128469467163, "rewards/margins": 6.831245422363281, "rewards/rejected": -7.560257911682129, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.8396479142747797e-07, "logits/chosen": -2.5280001163482666, "logits/rejected": -2.476745367050171, "logps/chosen": -287.8592529296875, "logps/rejected": -325.2400207519531, "loss": 0.0638, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.8896492123603821, "rewards/margins": 7.580406188964844, "rewards/rejected": -8.47005558013916, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8205128205128203e-07, "logits/chosen": -2.591068744659424, "logits/rejected": -2.5293643474578857, "logps/chosen": -284.336181640625, "logps/rejected": -330.9443359375, "loss": 0.0486, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.8629849553108215, "rewards/margins": 6.952902317047119, "rewards/rejected": -7.815886497497559, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.801377726750861e-07, "logits/chosen": -2.597510814666748, "logits/rejected": -2.549468517303467, "logps/chosen": -293.65545654296875, "logps/rejected": -315.900146484375, "loss": 0.0487, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.5767217874526978, "rewards/margins": 7.4031982421875, "rewards/rejected": -7.97991943359375, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.7822426329889015e-07, "logits/chosen": -2.6672866344451904, "logits/rejected": -2.5381336212158203, "logps/chosen": -321.498291015625, "logps/rejected": -318.640869140625, "loss": 0.0441, "rewards/accuracies": 0.96875, "rewards/chosen": -0.2594658434391022, "rewards/margins": 7.167167663574219, "rewards/rejected": -7.426634311676025, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.763107539226942e-07, "logits/chosen": -2.6042659282684326, "logits/rejected": -2.4887306690216064, "logps/chosen": -276.0786437988281, "logps/rejected": -292.72967529296875, "loss": 0.0589, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5728877186775208, "rewards/margins": 6.894124507904053, "rewards/rejected": -7.467011451721191, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.743972445464983e-07, "logits/chosen": -2.5835888385772705, "logits/rejected": -2.523806571960449, "logps/chosen": -274.3123474121094, "logps/rejected": -314.17633056640625, "loss": 0.0726, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.20469942688941956, "rewards/margins": 6.882570743560791, "rewards/rejected": -7.087271213531494, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.7248373517030234e-07, "logits/chosen": -2.5750460624694824, "logits/rejected": -2.498121976852417, "logps/chosen": -270.62261962890625, "logps/rejected": -296.22247314453125, "loss": 0.0533, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.46651512384414673, "rewards/margins": 6.364560604095459, "rewards/rejected": -6.831076145172119, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.705702257941064e-07, "logits/chosen": -2.6253905296325684, "logits/rejected": -2.530066967010498, "logps/chosen": -249.2847442626953, "logps/rejected": -282.79571533203125, "loss": 0.0408, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.562870979309082, "rewards/margins": 6.45670223236084, "rewards/rejected": -7.019574165344238, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.686567164179104e-07, "logits/chosen": -2.5404858589172363, "logits/rejected": -2.4926059246063232, "logps/chosen": -304.41253662109375, "logps/rejected": -347.6630554199219, "loss": 0.0535, "rewards/accuracies": 0.96875, "rewards/chosen": -0.6575719714164734, "rewards/margins": 7.0210371017456055, "rewards/rejected": -7.678609371185303, "step": 1500 }, { "epoch": 1.55, "eval_logits/chosen": -2.652937650680542, "eval_logits/rejected": -2.5616180896759033, "eval_logps/chosen": -295.47467041015625, "eval_logps/rejected": -277.6109924316406, "eval_loss": 0.573060154914856, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -2.162747621536255, "eval_rewards/margins": 2.631584882736206, "eval_rewards/rejected": -4.794332981109619, "eval_runtime": 329.0517, "eval_samples_per_second": 6.078, "eval_steps_per_second": 0.38, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6674320704171447e-07, "logits/chosen": -2.6182427406311035, "logits/rejected": -2.5373194217681885, "logps/chosen": -272.0426940917969, "logps/rejected": -314.56256103515625, "loss": 0.0406, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.302041620016098, "rewards/margins": 6.977307319641113, "rewards/rejected": -7.279348850250244, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6482969766551853e-07, "logits/chosen": -2.5645289421081543, "logits/rejected": -2.5248210430145264, "logps/chosen": -267.35479736328125, "logps/rejected": -293.21136474609375, "loss": 0.0381, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3300275206565857, "rewards/margins": 6.987812042236328, "rewards/rejected": -7.317839622497559, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.629161882893226e-07, "logits/chosen": -2.588085889816284, "logits/rejected": -2.4837424755096436, "logps/chosen": -276.7251281738281, "logps/rejected": -307.32916259765625, "loss": 0.0313, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.410992294549942, "rewards/margins": 6.828526496887207, "rewards/rejected": -7.239518165588379, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.6100267891312666e-07, "logits/chosen": -2.5293657779693604, "logits/rejected": -2.4737491607666016, "logps/chosen": -249.6378936767578, "logps/rejected": -282.2840881347656, "loss": 0.0485, "rewards/accuracies": 0.96875, "rewards/chosen": -1.0298715829849243, "rewards/margins": 6.555671691894531, "rewards/rejected": -7.58554220199585, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.590891695369307e-07, "logits/chosen": -2.5496673583984375, "logits/rejected": -2.4890804290771484, "logps/chosen": -300.34918212890625, "logps/rejected": -333.11688232421875, "loss": 0.0441, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0020015239715576, "rewards/margins": 6.947371006011963, "rewards/rejected": -7.949372291564941, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.571756601607348e-07, "logits/chosen": -2.464364528656006, "logits/rejected": -2.387753963470459, "logps/chosen": -264.74920654296875, "logps/rejected": -290.9281005859375, "loss": 0.0492, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6402736902236938, "rewards/margins": 6.474742889404297, "rewards/rejected": -7.115015983581543, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5526215078453884e-07, "logits/chosen": -2.5369412899017334, "logits/rejected": -2.425361156463623, "logps/chosen": -284.7231750488281, "logps/rejected": -296.4128112792969, "loss": 0.0491, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.9682533144950867, "rewards/margins": 6.315736770629883, "rewards/rejected": -7.28399133682251, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.533486414083429e-07, "logits/chosen": -2.563443660736084, "logits/rejected": -2.441769599914551, "logps/chosen": -313.6112365722656, "logps/rejected": -305.52239990234375, "loss": 0.0388, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.46426552534103394, "rewards/margins": 6.881335258483887, "rewards/rejected": -7.3456010818481445, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5143513203214697e-07, "logits/chosen": -2.5470073223114014, "logits/rejected": -2.4438252449035645, "logps/chosen": -251.0168914794922, "logps/rejected": -280.91961669921875, "loss": 0.0562, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.10129622370004654, "rewards/margins": 6.691439151763916, "rewards/rejected": -6.7927350997924805, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.49521622655951e-07, "logits/chosen": -2.610071897506714, "logits/rejected": -2.5157220363616943, "logps/chosen": -281.8477783203125, "logps/rejected": -294.8487243652344, "loss": 0.063, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2210036963224411, "rewards/margins": 6.306001663208008, "rewards/rejected": -6.527005195617676, "step": 1600 }, { "epoch": 1.65, "eval_logits/chosen": -2.5985054969787598, "eval_logits/rejected": -2.503840923309326, "eval_logps/chosen": -293.67022705078125, "eval_logps/rejected": -275.4324645996094, "eval_loss": 0.543339192867279, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -1.9823036193847656, "eval_rewards/margins": 2.594174861907959, "eval_rewards/rejected": -4.576478481292725, "eval_runtime": 329.1368, "eval_samples_per_second": 6.077, "eval_steps_per_second": 0.38, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.4760811327975504e-07, "logits/chosen": -2.5374481678009033, "logits/rejected": -2.4631876945495605, "logps/chosen": -272.7527770996094, "logps/rejected": -301.7461242675781, "loss": 0.0515, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.12996195256710052, "rewards/margins": 7.1182541847229, "rewards/rejected": -7.248216152191162, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.456946039035591e-07, "logits/chosen": -2.5310726165771484, "logits/rejected": -2.470857620239258, "logps/chosen": -264.4376525878906, "logps/rejected": -309.92205810546875, "loss": 0.0466, "rewards/accuracies": 0.96875, "rewards/chosen": -0.09321962296962738, "rewards/margins": 6.667203426361084, "rewards/rejected": -6.760423183441162, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.4378109452736316e-07, "logits/chosen": -2.4673683643341064, "logits/rejected": -2.4015889167785645, "logps/chosen": -245.9351348876953, "logps/rejected": -268.8101806640625, "loss": 0.054, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.7791624665260315, "rewards/margins": 6.342289924621582, "rewards/rejected": -7.121452331542969, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.418675851511672e-07, "logits/chosen": -2.560784101486206, "logits/rejected": -2.449129343032837, "logps/chosen": -273.04229736328125, "logps/rejected": -294.8717346191406, "loss": 0.0389, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.1826784312725067, "rewards/margins": 6.658658504486084, "rewards/rejected": -6.841336727142334, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.399540757749713e-07, "logits/chosen": -2.5664353370666504, "logits/rejected": -2.452826738357544, "logps/chosen": -303.6127624511719, "logps/rejected": -331.66033935546875, "loss": 0.0434, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.40071630477905273, "rewards/margins": 6.958899021148682, "rewards/rejected": -7.359615325927734, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3804056639877535e-07, "logits/chosen": -2.535295009613037, "logits/rejected": -2.417152166366577, "logps/chosen": -333.24072265625, "logps/rejected": -316.44830322265625, "loss": 0.0342, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7423610091209412, "rewards/margins": 7.322349548339844, "rewards/rejected": -8.06471061706543, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.361270570225794e-07, "logits/chosen": -2.564420700073242, "logits/rejected": -2.4815382957458496, "logps/chosen": -262.5311279296875, "logps/rejected": -331.90570068359375, "loss": 0.0324, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.6712821125984192, "rewards/margins": 7.437242031097412, "rewards/rejected": -8.108525276184082, "step": 1670 }, { "epoch": 1.73, "learning_rate": 2.3421354764638345e-07, "logits/chosen": -2.515321969985962, "logits/rejected": -2.3945698738098145, "logps/chosen": -290.71563720703125, "logps/rejected": -294.52008056640625, "loss": 0.0412, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.4271801710128784, "rewards/margins": 6.557519435882568, "rewards/rejected": -7.984699249267578, "step": 1680 }, { "epoch": 1.74, "learning_rate": 2.323000382701875e-07, "logits/chosen": -2.523240566253662, "logits/rejected": -2.432344436645508, "logps/chosen": -275.192138671875, "logps/rejected": -287.0332336425781, "loss": 0.0567, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.1197443008422852, "rewards/margins": 6.0831122398376465, "rewards/rejected": -7.202856540679932, "step": 1690 }, { "epoch": 1.76, "learning_rate": 2.3038652889399157e-07, "logits/chosen": -2.4755594730377197, "logits/rejected": -2.389221668243408, "logps/chosen": -303.61181640625, "logps/rejected": -317.91302490234375, "loss": 0.0423, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6061100959777832, "rewards/margins": 7.471484184265137, "rewards/rejected": -8.077593803405762, "step": 1700 }, { "epoch": 1.76, "eval_logits/chosen": -2.5654473304748535, "eval_logits/rejected": -2.4635632038116455, "eval_logps/chosen": -300.39990234375, "eval_logps/rejected": -283.8501892089844, "eval_loss": 0.5821015238761902, "eval_rewards/accuracies": 0.7540000081062317, "eval_rewards/chosen": -2.655275344848633, "eval_rewards/margins": 2.762974739074707, "eval_rewards/rejected": -5.41825008392334, "eval_runtime": 329.0804, "eval_samples_per_second": 6.078, "eval_steps_per_second": 0.38, "step": 1700 }, { "epoch": 1.77, "learning_rate": 2.2847301951779563e-07, "logits/chosen": -2.4770474433898926, "logits/rejected": -2.4310386180877686, "logps/chosen": -263.2637939453125, "logps/rejected": -285.5542297363281, "loss": 0.0644, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.271353542804718, "rewards/margins": 6.415966987609863, "rewards/rejected": -6.687320709228516, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.265595101415997e-07, "logits/chosen": -2.52775239944458, "logits/rejected": -2.4429614543914795, "logps/chosen": -280.1533203125, "logps/rejected": -329.3421936035156, "loss": 0.0626, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.26627108454704285, "rewards/margins": 7.076508522033691, "rewards/rejected": -7.342779636383057, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.2464600076540373e-07, "logits/chosen": -2.5030529499053955, "logits/rejected": -2.4013702869415283, "logps/chosen": -240.1175537109375, "logps/rejected": -286.1576232910156, "loss": 0.0656, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.5509620904922485, "rewards/margins": 6.837267875671387, "rewards/rejected": -7.388230323791504, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.227324913892078e-07, "logits/chosen": -2.5704593658447266, "logits/rejected": -2.5203540325164795, "logps/chosen": -287.09075927734375, "logps/rejected": -313.4488830566406, "loss": 0.0569, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.18775661289691925, "rewards/margins": 6.396711826324463, "rewards/rejected": -6.584468841552734, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2081898201301186e-07, "logits/chosen": -2.455457925796509, "logits/rejected": -2.402740716934204, "logps/chosen": -269.8533630371094, "logps/rejected": -295.97845458984375, "loss": 0.0409, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4922047257423401, "rewards/margins": 6.239272117614746, "rewards/rejected": -6.7314772605896, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1890547263681592e-07, "logits/chosen": -2.5787675380706787, "logits/rejected": -2.5012478828430176, "logps/chosen": -270.3778381347656, "logps/rejected": -303.08624267578125, "loss": 0.0471, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.5367101430892944, "rewards/margins": 6.749762058258057, "rewards/rejected": -7.286472320556641, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1699196326061998e-07, "logits/chosen": -2.498349189758301, "logits/rejected": -2.4476866722106934, "logps/chosen": -277.01446533203125, "logps/rejected": -307.1866760253906, "loss": 0.0436, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.36937451362609863, "rewards/margins": 6.952645778656006, "rewards/rejected": -7.322020053863525, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.1507845388442402e-07, "logits/chosen": -2.5487923622131348, "logits/rejected": -2.4616153240203857, "logps/chosen": -280.83990478515625, "logps/rejected": -283.40008544921875, "loss": 0.0603, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3574257791042328, "rewards/margins": 6.618617057800293, "rewards/rejected": -6.976043701171875, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.1316494450822808e-07, "logits/chosen": -2.508967876434326, "logits/rejected": -2.4427499771118164, "logps/chosen": -282.5861511230469, "logps/rejected": -310.12091064453125, "loss": 0.059, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5728462934494019, "rewards/margins": 6.127859115600586, "rewards/rejected": -6.700705051422119, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1125143513203214e-07, "logits/chosen": -2.4674155712127686, "logits/rejected": -2.4133477210998535, "logps/chosen": -274.87060546875, "logps/rejected": -287.4228210449219, "loss": 0.0559, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9308744668960571, "rewards/margins": 6.335507869720459, "rewards/rejected": -7.266383171081543, "step": 1800 }, { "epoch": 1.86, "eval_logits/chosen": -2.5741446018218994, "eval_logits/rejected": -2.4843475818634033, "eval_logps/chosen": -299.6483154296875, "eval_logps/rejected": -282.3105773925781, "eval_loss": 0.5656670928001404, "eval_rewards/accuracies": 0.7519999742507935, "eval_rewards/chosen": -2.580115556716919, "eval_rewards/margins": 2.684171438217163, "eval_rewards/rejected": -5.264286518096924, "eval_runtime": 330.2789, "eval_samples_per_second": 6.055, "eval_steps_per_second": 0.378, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.093379257558362e-07, "logits/chosen": -2.5033411979675293, "logits/rejected": -2.432813882827759, "logps/chosen": -247.942626953125, "logps/rejected": -305.20867919921875, "loss": 0.0516, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.7584810853004456, "rewards/margins": 6.978280067443848, "rewards/rejected": -7.73676061630249, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0742441637964026e-07, "logits/chosen": -2.499147653579712, "logits/rejected": -2.4221835136413574, "logps/chosen": -284.1277160644531, "logps/rejected": -317.18463134765625, "loss": 0.0549, "rewards/accuracies": 0.96875, "rewards/chosen": -0.9464308619499207, "rewards/margins": 6.537860870361328, "rewards/rejected": -7.484292507171631, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.055109070034443e-07, "logits/chosen": -2.519296169281006, "logits/rejected": -2.4646060466766357, "logps/chosen": -279.44268798828125, "logps/rejected": -308.8426513671875, "loss": 0.05, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.49726948142051697, "rewards/margins": 6.457852840423584, "rewards/rejected": -6.9551215171813965, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0359739762724836e-07, "logits/chosen": -2.5203187465667725, "logits/rejected": -2.459474802017212, "logps/chosen": -275.2139892578125, "logps/rejected": -314.4609680175781, "loss": 0.0526, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.601524293422699, "rewards/margins": 6.1087565422058105, "rewards/rejected": -6.7102813720703125, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0168388825105242e-07, "logits/chosen": -2.523761034011841, "logits/rejected": -2.4565975666046143, "logps/chosen": -288.87469482421875, "logps/rejected": -316.4368896484375, "loss": 0.0576, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4725467562675476, "rewards/margins": 6.452563285827637, "rewards/rejected": -6.92510986328125, "step": 1850 }, { "epoch": 1.92, "learning_rate": 1.997703788748565e-07, "logits/chosen": -2.49338960647583, "logits/rejected": -2.41786527633667, "logps/chosen": -295.960205078125, "logps/rejected": -308.38922119140625, "loss": 0.0607, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5486451387405396, "rewards/margins": 6.254899024963379, "rewards/rejected": -6.803544044494629, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9785686949866055e-07, "logits/chosen": -2.4441514015197754, "logits/rejected": -2.4188904762268066, "logps/chosen": -280.95782470703125, "logps/rejected": -308.5982360839844, "loss": 0.0612, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.31675153970718384, "rewards/margins": 6.945546627044678, "rewards/rejected": -7.262298583984375, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9594336012246458e-07, "logits/chosen": -2.518660545349121, "logits/rejected": -2.458202838897705, "logps/chosen": -288.21527099609375, "logps/rejected": -335.2455139160156, "loss": 0.0436, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.16030362248420715, "rewards/margins": 6.654341697692871, "rewards/rejected": -6.814645290374756, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -2.5323879718780518, "logits/rejected": -2.4646458625793457, "logps/chosen": -253.74435424804688, "logps/rejected": -308.44464111328125, "loss": 0.0475, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.6830048561096191, "rewards/margins": 6.6035966873168945, "rewards/rejected": -7.286602020263672, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.921163413700727e-07, "logits/chosen": -2.5229687690734863, "logits/rejected": -2.4853615760803223, "logps/chosen": -314.48468017578125, "logps/rejected": -311.384521484375, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": -0.8841110467910767, "rewards/margins": 6.407034873962402, "rewards/rejected": -7.291146278381348, "step": 1900 }, { "epoch": 1.96, "eval_logits/chosen": -2.539219379425049, "eval_logits/rejected": -2.449059009552002, "eval_logps/chosen": -298.44427490234375, "eval_logps/rejected": -282.5741882324219, "eval_loss": 0.5758858919143677, "eval_rewards/accuracies": 0.7480000257492065, "eval_rewards/chosen": -2.4597108364105225, "eval_rewards/margins": 2.8309402465820312, "eval_rewards/rejected": -5.290651321411133, "eval_runtime": 329.9748, "eval_samples_per_second": 6.061, "eval_steps_per_second": 0.379, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9020283199387677e-07, "logits/chosen": -2.4514074325561523, "logits/rejected": -2.3297629356384277, "logps/chosen": -287.038330078125, "logps/rejected": -299.0102233886719, "loss": 0.0499, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0671433210372925, "rewards/margins": 5.865691184997559, "rewards/rejected": -6.932834625244141, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8828932261768083e-07, "logits/chosen": -2.4341206550598145, "logits/rejected": -2.371324300765991, "logps/chosen": -282.197998046875, "logps/rejected": -301.5996398925781, "loss": 0.0463, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2245758771896362, "rewards/margins": 6.24572229385376, "rewards/rejected": -7.470297813415527, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.8637581324148487e-07, "logits/chosen": -2.438136577606201, "logits/rejected": -2.3593366146087646, "logps/chosen": -286.82855224609375, "logps/rejected": -316.28240966796875, "loss": 0.0489, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0146198272705078, "rewards/margins": 6.5871477127075195, "rewards/rejected": -7.601768493652344, "step": 1930 }, { "epoch": 2.0, "learning_rate": 1.8446230386528893e-07, "logits/chosen": -2.383092164993286, "logits/rejected": -2.2963826656341553, "logps/chosen": -294.37200927734375, "logps/rejected": -295.9538879394531, "loss": 0.0617, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.6222116947174072, "rewards/margins": 6.908046722412109, "rewards/rejected": -7.530259132385254, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.82548794489093e-07, "logits/chosen": -2.4570600986480713, "logits/rejected": -2.4170963764190674, "logps/chosen": -294.67333984375, "logps/rejected": -328.25335693359375, "loss": 0.0567, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6761181950569153, "rewards/margins": 6.681130886077881, "rewards/rejected": -7.3572492599487305, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8063528511289706e-07, "logits/chosen": -2.4279465675354004, "logits/rejected": -2.3740410804748535, "logps/chosen": -305.5331726074219, "logps/rejected": -321.54974365234375, "loss": 0.0629, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.977795422077179, "rewards/margins": 6.151275634765625, "rewards/rejected": -7.129071235656738, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7872177573670112e-07, "logits/chosen": -2.4284489154815674, "logits/rejected": -2.3778624534606934, "logps/chosen": -238.752685546875, "logps/rejected": -279.09161376953125, "loss": 0.0655, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1264437437057495, "rewards/margins": 5.814446926116943, "rewards/rejected": -6.940890312194824, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.7680826636050515e-07, "logits/chosen": -2.4329612255096436, "logits/rejected": -2.374427080154419, "logps/chosen": -259.0732116699219, "logps/rejected": -254.0245361328125, "loss": 0.0516, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.238419771194458, "rewards/margins": 5.634509086608887, "rewards/rejected": -6.872928619384766, "step": 1980 }, { "epoch": 2.05, "learning_rate": 1.7489475698430921e-07, "logits/chosen": -2.435520648956299, "logits/rejected": -2.4173076152801514, "logps/chosen": -271.8101501464844, "logps/rejected": -295.2347412109375, "loss": 0.0426, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.8866006731987, "rewards/margins": 6.229605674743652, "rewards/rejected": -7.116206169128418, "step": 1990 }, { "epoch": 2.07, "learning_rate": 1.7298124760811328e-07, "logits/chosen": -2.4372620582580566, "logits/rejected": -2.3834497928619385, "logps/chosen": -262.3973388671875, "logps/rejected": -304.9111022949219, "loss": 0.0576, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2331212759017944, "rewards/margins": 5.813534259796143, "rewards/rejected": -7.046655178070068, "step": 2000 }, { "epoch": 2.07, "eval_logits/chosen": -2.5015718936920166, "eval_logits/rejected": -2.413215160369873, "eval_logps/chosen": -299.8445739746094, "eval_logps/rejected": -282.8996887207031, "eval_loss": 0.5613510608673096, "eval_rewards/accuracies": 0.7620000243186951, "eval_rewards/chosen": -2.5997402667999268, "eval_rewards/margins": 2.7234604358673096, "eval_rewards/rejected": -5.323200702667236, "eval_runtime": 329.9108, "eval_samples_per_second": 6.062, "eval_steps_per_second": 0.379, "step": 2000 }, { "epoch": 2.08, "learning_rate": 1.7106773823191734e-07, "logits/chosen": -2.474686622619629, "logits/rejected": -2.3782389163970947, "logps/chosen": -324.77459716796875, "logps/rejected": -299.6976623535156, "loss": 0.0394, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.9665130376815796, "rewards/margins": 6.133960723876953, "rewards/rejected": -7.100473880767822, "step": 2010 }, { "epoch": 2.09, "learning_rate": 1.691542288557214e-07, "logits/chosen": -2.4789416790008545, "logits/rejected": -2.4072928428649902, "logps/chosen": -298.6253356933594, "logps/rejected": -285.716064453125, "loss": 0.0399, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.0751765966415405, "rewards/margins": 5.9020466804504395, "rewards/rejected": -6.9772233963012695, "step": 2020 }, { "epoch": 2.1, "learning_rate": 1.6724071947952544e-07, "logits/chosen": -2.4440250396728516, "logits/rejected": -2.360992670059204, "logps/chosen": -285.7022705078125, "logps/rejected": -315.54937744140625, "loss": 0.0295, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.0514458417892456, "rewards/margins": 6.273679733276367, "rewards/rejected": -7.325125694274902, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.653272101033295e-07, "logits/chosen": -2.4839930534362793, "logits/rejected": -2.404003620147705, "logps/chosen": -268.47308349609375, "logps/rejected": -310.1471252441406, "loss": 0.0317, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.2032978534698486, "rewards/margins": 5.9852614402771, "rewards/rejected": -7.188559532165527, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6341370072713356e-07, "logits/chosen": -2.52724289894104, "logits/rejected": -2.4334981441497803, "logps/chosen": -312.8106689453125, "logps/rejected": -323.5155334472656, "loss": 0.0278, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9176046252250671, "rewards/margins": 6.5511651039123535, "rewards/rejected": -7.4687700271606445, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6150019135093762e-07, "logits/chosen": -2.4501614570617676, "logits/rejected": -2.4063010215759277, "logps/chosen": -260.9747314453125, "logps/rejected": -286.7646179199219, "loss": 0.0239, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.3017902374267578, "rewards/margins": 6.399510860443115, "rewards/rejected": -7.701300144195557, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5958668197474169e-07, "logits/chosen": -2.491021156311035, "logits/rejected": -2.484483480453491, "logps/chosen": -273.3662109375, "logps/rejected": -292.72528076171875, "loss": 0.0237, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.281099557876587, "rewards/margins": 6.190659523010254, "rewards/rejected": -7.471758842468262, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.5767317259854572e-07, "logits/chosen": -2.4878249168395996, "logits/rejected": -2.434704542160034, "logps/chosen": -300.8520812988281, "logps/rejected": -315.6512756347656, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.3595197200775146, "rewards/margins": 6.689205169677734, "rewards/rejected": -8.048724174499512, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5575966322234978e-07, "logits/chosen": -2.46006441116333, "logits/rejected": -2.346818447113037, "logps/chosen": -302.2090759277344, "logps/rejected": -295.3673095703125, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.1756342649459839, "rewards/margins": 6.565530300140381, "rewards/rejected": -7.7411651611328125, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -2.456608295440674, "logits/rejected": -2.355076313018799, "logps/chosen": -282.78973388671875, "logps/rejected": -310.10906982421875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.484881043434143, "rewards/margins": 7.286185264587402, "rewards/rejected": -8.771065711975098, "step": 2100 }, { "epoch": 2.17, "eval_logits/chosen": -2.5039713382720947, "eval_logits/rejected": -2.4052250385284424, "eval_logps/chosen": -305.83544921875, "eval_logps/rejected": -293.5166320800781, "eval_loss": 0.6182354688644409, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -3.198823928833008, "eval_rewards/margins": 3.186070442199707, "eval_rewards/rejected": -6.384894371032715, "eval_runtime": 330.4934, "eval_samples_per_second": 6.052, "eval_steps_per_second": 0.378, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.519326444699579e-07, "logits/chosen": -2.447910785675049, "logits/rejected": -2.3610103130340576, "logps/chosen": -282.10418701171875, "logps/rejected": -316.7162780761719, "loss": 0.0157, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.4366261959075928, "rewards/margins": 7.56749963760376, "rewards/rejected": -9.004125595092773, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.5001913509376197e-07, "logits/chosen": -2.4108386039733887, "logits/rejected": -2.3353419303894043, "logps/chosen": -258.7259826660156, "logps/rejected": -306.69049072265625, "loss": 0.0193, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2959132194519043, "rewards/margins": 7.697202205657959, "rewards/rejected": -8.993115425109863, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.4810562571756603e-07, "logits/chosen": -2.437985897064209, "logits/rejected": -2.352027416229248, "logps/chosen": -304.39508056640625, "logps/rejected": -358.80987548828125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.8380666971206665, "rewards/margins": 8.16486930847168, "rewards/rejected": -10.002935409545898, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.4619211634137007e-07, "logits/chosen": -2.495406150817871, "logits/rejected": -2.417428970336914, "logps/chosen": -327.63592529296875, "logps/rejected": -334.3495788574219, "loss": 0.0151, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.7903884649276733, "rewards/margins": 7.90988302230835, "rewards/rejected": -9.700272560119629, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4427860696517413e-07, "logits/chosen": -2.4637489318847656, "logits/rejected": -2.3969979286193848, "logps/chosen": -277.05206298828125, "logps/rejected": -316.2105712890625, "loss": 0.0135, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.583605170249939, "rewards/margins": 8.034161567687988, "rewards/rejected": -9.617767333984375, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.423650975889782e-07, "logits/chosen": -2.4575247764587402, "logits/rejected": -2.3824923038482666, "logps/chosen": -284.96319580078125, "logps/rejected": -330.1680908203125, "loss": 0.0118, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.8364570140838623, "rewards/margins": 8.434205055236816, "rewards/rejected": -10.270662307739258, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4045158821278225e-07, "logits/chosen": -2.43703031539917, "logits/rejected": -2.3671228885650635, "logps/chosen": -336.13751220703125, "logps/rejected": -354.32635498046875, "loss": 0.0176, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.1960761547088623, "rewards/margins": 8.524799346923828, "rewards/rejected": -9.720874786376953, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.3853807883658632e-07, "logits/chosen": -2.3543310165405273, "logits/rejected": -2.3394277095794678, "logps/chosen": -272.6856384277344, "logps/rejected": -339.7895812988281, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.8999465703964233, "rewards/margins": 8.533658981323242, "rewards/rejected": -10.433606147766113, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.3662456946039035e-07, "logits/chosen": -2.386059522628784, "logits/rejected": -2.2526228427886963, "logps/chosen": -311.24481201171875, "logps/rejected": -337.0218811035156, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.3375542163848877, "rewards/margins": 9.32975959777832, "rewards/rejected": -10.667314529418945, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3471106008419441e-07, "logits/chosen": -2.4342336654663086, "logits/rejected": -2.3418126106262207, "logps/chosen": -309.2164611816406, "logps/rejected": -374.22576904296875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.8751029968261719, "rewards/margins": 9.406895637512207, "rewards/rejected": -11.281997680664062, "step": 2200 }, { "epoch": 2.27, "eval_logits/chosen": -2.4494409561157227, "eval_logits/rejected": -2.353502035140991, "eval_logps/chosen": -319.8072204589844, "eval_logps/rejected": -311.62286376953125, "eval_loss": 0.7074651122093201, "eval_rewards/accuracies": 0.7419999837875366, "eval_rewards/chosen": -4.59600305557251, "eval_rewards/margins": 3.599517345428467, "eval_rewards/rejected": -8.195520401000977, "eval_runtime": 330.3156, "eval_samples_per_second": 6.055, "eval_steps_per_second": 0.378, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3279755070799848e-07, "logits/chosen": -2.337085247039795, "logits/rejected": -2.296482563018799, "logps/chosen": -290.27789306640625, "logps/rejected": -344.79730224609375, "loss": 0.0096, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.79486083984375, "rewards/margins": 9.907821655273438, "rewards/rejected": -11.702681541442871, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3088404133180254e-07, "logits/chosen": -2.3764395713806152, "logits/rejected": -2.3238234519958496, "logps/chosen": -271.51763916015625, "logps/rejected": -333.38580322265625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.068006992340088, "rewards/margins": 9.185879707336426, "rewards/rejected": -11.253885269165039, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.289705319556066e-07, "logits/chosen": -2.3385708332061768, "logits/rejected": -2.2395923137664795, "logps/chosen": -283.2237854003906, "logps/rejected": -340.7456970214844, "loss": 0.0137, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.4410890340805054, "rewards/margins": 9.802583694458008, "rewards/rejected": -11.243673324584961, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2705702257941064e-07, "logits/chosen": -2.3051886558532715, "logits/rejected": -2.232696771621704, "logps/chosen": -284.79730224609375, "logps/rejected": -361.62896728515625, "loss": 0.0135, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.2478265762329102, "rewards/margins": 9.604982376098633, "rewards/rejected": -10.85280990600586, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.251435132032147e-07, "logits/chosen": -2.236691474914551, "logits/rejected": -2.1974494457244873, "logps/chosen": -269.29443359375, "logps/rejected": -338.17822265625, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1111952066421509, "rewards/margins": 9.45576000213623, "rewards/rejected": -10.566953659057617, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2323000382701873e-07, "logits/chosen": -2.350980520248413, "logits/rejected": -2.2944483757019043, "logps/chosen": -288.3419494628906, "logps/rejected": -323.5022277832031, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.3432337045669556, "rewards/margins": 9.604742050170898, "rewards/rejected": -10.947977066040039, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.213164944508228e-07, "logits/chosen": -2.3275721073150635, "logits/rejected": -2.25993013381958, "logps/chosen": -260.2298889160156, "logps/rejected": -314.8516845703125, "loss": 0.0103, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.1889066696166992, "rewards/margins": 8.959939956665039, "rewards/rejected": -10.148846626281738, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -2.3323237895965576, "logits/rejected": -2.2373764514923096, "logps/chosen": -270.145751953125, "logps/rejected": -313.8666076660156, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.1815932989120483, "rewards/margins": 9.265039443969727, "rewards/rejected": -10.446632385253906, "step": 2280 }, { "epoch": 2.36, "learning_rate": 1.1748947569843092e-07, "logits/chosen": -2.3373894691467285, "logits/rejected": -2.2577757835388184, "logps/chosen": -290.20556640625, "logps/rejected": -351.8134765625, "loss": 0.0088, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.011973261833191, "rewards/margins": 9.961956977844238, "rewards/rejected": -10.973930358886719, "step": 2290 }, { "epoch": 2.37, "learning_rate": 1.1557596632223497e-07, "logits/chosen": -2.344902515411377, "logits/rejected": -2.2504470348358154, "logps/chosen": -266.8149719238281, "logps/rejected": -353.9951477050781, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.1190001964569092, "rewards/margins": 10.073897361755371, "rewards/rejected": -11.192898750305176, "step": 2300 }, { "epoch": 2.37, "eval_logits/chosen": -2.397207736968994, "eval_logits/rejected": -2.2942862510681152, "eval_logps/chosen": -315.9493408203125, "eval_logps/rejected": -307.45587158203125, "eval_loss": 0.7116624712944031, "eval_rewards/accuracies": 0.7540000081062317, "eval_rewards/chosen": -4.210216999053955, "eval_rewards/margins": 3.5686044692993164, "eval_rewards/rejected": -7.778822422027588, "eval_runtime": 329.6201, "eval_samples_per_second": 6.068, "eval_steps_per_second": 0.379, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.1366245694603903e-07, "logits/chosen": -2.30245304107666, "logits/rejected": -2.233797073364258, "logps/chosen": -265.81402587890625, "logps/rejected": -341.81341552734375, "loss": 0.0119, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4100592136383057, "rewards/margins": 10.045531272888184, "rewards/rejected": -11.455589294433594, "step": 2310 }, { "epoch": 2.4, "learning_rate": 1.1174894756984308e-07, "logits/chosen": -2.3736674785614014, "logits/rejected": -2.2465882301330566, "logps/chosen": -302.5309143066406, "logps/rejected": -323.94378662109375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.3777319192886353, "rewards/margins": 9.126462936401367, "rewards/rejected": -10.504194259643555, "step": 2320 }, { "epoch": 2.41, "learning_rate": 1.0983543819364714e-07, "logits/chosen": -2.2658305168151855, "logits/rejected": -2.225450038909912, "logps/chosen": -304.5056457519531, "logps/rejected": -348.7767639160156, "loss": 0.0114, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.0617547035217285, "rewards/margins": 9.54466724395752, "rewards/rejected": -10.60642147064209, "step": 2330 }, { "epoch": 2.42, "learning_rate": 1.079219288174512e-07, "logits/chosen": -2.2534327507019043, "logits/rejected": -2.2105424404144287, "logps/chosen": -306.9606018066406, "logps/rejected": -354.7109375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.238394021987915, "rewards/margins": 9.64238166809082, "rewards/rejected": -10.880776405334473, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0600841944125525e-07, "logits/chosen": -2.3249001502990723, "logits/rejected": -2.2780890464782715, "logps/chosen": -268.6153564453125, "logps/rejected": -346.42047119140625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.2436628341674805, "rewards/margins": 9.205244064331055, "rewards/rejected": -10.448905944824219, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0409491006505931e-07, "logits/chosen": -2.3749475479125977, "logits/rejected": -2.277677059173584, "logps/chosen": -300.11895751953125, "logps/rejected": -348.1705017089844, "loss": 0.0061, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.0702159404754639, "rewards/margins": 9.171914100646973, "rewards/rejected": -10.2421293258667, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0218140068886336e-07, "logits/chosen": -2.2845263481140137, "logits/rejected": -2.197556972503662, "logps/chosen": -277.10174560546875, "logps/rejected": -324.47216796875, "loss": 0.01, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.2265087366104126, "rewards/margins": 9.168752670288086, "rewards/rejected": -10.395261764526367, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.0026789131266743e-07, "logits/chosen": -2.3598082065582275, "logits/rejected": -2.265746593475342, "logps/chosen": -294.85296630859375, "logps/rejected": -352.6213073730469, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.5622330904006958, "rewards/margins": 9.054221153259277, "rewards/rejected": -10.616453170776367, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.835438193647149e-08, "logits/chosen": -2.3179988861083984, "logits/rejected": -2.2776644229888916, "logps/chosen": -301.02130126953125, "logps/rejected": -350.03765869140625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.5659319162368774, "rewards/margins": 9.565340995788574, "rewards/rejected": -11.13127326965332, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.644087256027554e-08, "logits/chosen": -2.3582870960235596, "logits/rejected": -2.267897367477417, "logps/chosen": -298.76849365234375, "logps/rejected": -371.87237548828125, "loss": 0.0104, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.6058155298233032, "rewards/margins": 9.32790470123291, "rewards/rejected": -10.93371868133545, "step": 2400 }, { "epoch": 2.48, "eval_logits/chosen": -2.409748077392578, "eval_logits/rejected": -2.3097164630889893, "eval_logps/chosen": -317.2179870605469, "eval_logps/rejected": -308.91986083984375, "eval_loss": 0.7131382822990417, "eval_rewards/accuracies": 0.7540000081062317, "eval_rewards/chosen": -4.337080955505371, "eval_rewards/margins": 3.5881383419036865, "eval_rewards/rejected": -7.92521858215332, "eval_runtime": 330.8382, "eval_samples_per_second": 6.045, "eval_steps_per_second": 0.378, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.45273631840796e-08, "logits/chosen": -2.34238862991333, "logits/rejected": -2.2983975410461426, "logps/chosen": -308.1969299316406, "logps/rejected": -335.4678955078125, "loss": 0.008, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.8870359659194946, "rewards/margins": 9.149595260620117, "rewards/rejected": -11.03663158416748, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.261385380788366e-08, "logits/chosen": -2.4131252765655518, "logits/rejected": -2.2457823753356934, "logps/chosen": -327.6688537597656, "logps/rejected": -347.8465576171875, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6184027194976807, "rewards/margins": 9.20851993560791, "rewards/rejected": -10.826921463012695, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.070034443168771e-08, "logits/chosen": -2.3476271629333496, "logits/rejected": -2.217395067214966, "logps/chosen": -293.73785400390625, "logps/rejected": -336.8843994140625, "loss": 0.0077, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.0833864212036133, "rewards/margins": 9.448251724243164, "rewards/rejected": -11.531637191772461, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.878683505549177e-08, "logits/chosen": -2.3457190990448, "logits/rejected": -2.294241428375244, "logps/chosen": -289.23876953125, "logps/rejected": -358.06390380859375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4211084842681885, "rewards/margins": 9.614474296569824, "rewards/rejected": -11.035581588745117, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.687332567929582e-08, "logits/chosen": -2.321498394012451, "logits/rejected": -2.2159245014190674, "logps/chosen": -276.77105712890625, "logps/rejected": -323.2955627441406, "loss": 0.0122, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.7652084827423096, "rewards/margins": 8.866913795471191, "rewards/rejected": -10.632121086120605, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.495981630309988e-08, "logits/chosen": -2.3643240928649902, "logits/rejected": -2.2679874897003174, "logps/chosen": -282.6573181152344, "logps/rejected": -344.65411376953125, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9658873081207275, "rewards/margins": 9.22823715209961, "rewards/rejected": -11.194124221801758, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.304630692690395e-08, "logits/chosen": -2.252459764480591, "logits/rejected": -2.1973116397857666, "logps/chosen": -299.83819580078125, "logps/rejected": -377.33172607421875, "loss": 0.0138, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.9341270923614502, "rewards/margins": 9.789133071899414, "rewards/rejected": -11.723260879516602, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.1132797550708e-08, "logits/chosen": -2.3481366634368896, "logits/rejected": -2.285489320755005, "logps/chosen": -295.62939453125, "logps/rejected": -349.37347412109375, "loss": 0.0124, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7781429290771484, "rewards/margins": 9.523946762084961, "rewards/rejected": -11.302090644836426, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.921928817451206e-08, "logits/chosen": -2.2330024242401123, "logits/rejected": -2.1658129692077637, "logps/chosen": -266.7092590332031, "logps/rejected": -324.52203369140625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.702344298362732, "rewards/margins": 9.572081565856934, "rewards/rejected": -11.274426460266113, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.73057787983161e-08, "logits/chosen": -2.310683488845825, "logits/rejected": -2.223453998565674, "logps/chosen": -284.54583740234375, "logps/rejected": -350.00469970703125, "loss": 0.008, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.8044601678848267, "rewards/margins": 9.359917640686035, "rewards/rejected": -11.164377212524414, "step": 2500 }, { "epoch": 2.58, "eval_logits/chosen": -2.376404047012329, "eval_logits/rejected": -2.2755658626556396, "eval_logps/chosen": -318.2084045410156, "eval_logps/rejected": -311.3636474609375, "eval_loss": 0.7328027486801147, "eval_rewards/accuracies": 0.7519999742507935, "eval_rewards/chosen": -4.436122894287109, "eval_rewards/margins": 3.7334771156311035, "eval_rewards/rejected": -8.169599533081055, "eval_runtime": 330.2894, "eval_samples_per_second": 6.055, "eval_steps_per_second": 0.378, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.539226942212017e-08, "logits/chosen": -2.2334113121032715, "logits/rejected": -2.171754837036133, "logps/chosen": -278.3266906738281, "logps/rejected": -326.1250915527344, "loss": 0.0105, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.8271163702011108, "rewards/margins": 9.274404525756836, "rewards/rejected": -11.101519584655762, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.347876004592423e-08, "logits/chosen": -2.2726237773895264, "logits/rejected": -2.2070040702819824, "logps/chosen": -294.99591064453125, "logps/rejected": -365.6657409667969, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.223010778427124, "rewards/margins": 9.6091890335083, "rewards/rejected": -11.832199096679688, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.156525066972828e-08, "logits/chosen": -2.232114553451538, "logits/rejected": -2.1668314933776855, "logps/chosen": -302.2503662109375, "logps/rejected": -340.13568115234375, "loss": 0.0112, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.7910659313201904, "rewards/margins": 9.791200637817383, "rewards/rejected": -11.582267761230469, "step": 2530 }, { "epoch": 2.62, "learning_rate": 6.965174129353234e-08, "logits/chosen": -2.3309519290924072, "logits/rejected": -2.1995091438293457, "logps/chosen": -290.02789306640625, "logps/rejected": -338.7962341308594, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.841522216796875, "rewards/margins": 9.228216171264648, "rewards/rejected": -11.069738388061523, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.773823191733639e-08, "logits/chosen": -2.3205971717834473, "logits/rejected": -2.2115180492401123, "logps/chosen": -326.6658020019531, "logps/rejected": -337.70684814453125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.852582573890686, "rewards/margins": 9.197256088256836, "rewards/rejected": -11.049838066101074, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.582472254114045e-08, "logits/chosen": -2.294541835784912, "logits/rejected": -2.1916394233703613, "logps/chosen": -262.17401123046875, "logps/rejected": -323.60498046875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.84153151512146, "rewards/margins": 9.305184364318848, "rewards/rejected": -11.14671516418457, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.391121316494451e-08, "logits/chosen": -2.3827064037323, "logits/rejected": -2.2606282234191895, "logps/chosen": -295.3243408203125, "logps/rejected": -343.00860595703125, "loss": 0.0082, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.9150327444076538, "rewards/margins": 9.581887245178223, "rewards/rejected": -11.496919631958008, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.199770378874856e-08, "logits/chosen": -2.2721750736236572, "logits/rejected": -2.233607292175293, "logps/chosen": -286.4488220214844, "logps/rejected": -361.966552734375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.6838099956512451, "rewards/margins": 10.286967277526855, "rewards/rejected": -11.97077751159668, "step": 2580 }, { "epoch": 2.67, "learning_rate": 6.008419441255262e-08, "logits/chosen": -2.2622904777526855, "logits/rejected": -2.212773084640503, "logps/chosen": -274.4309997558594, "logps/rejected": -340.4100646972656, "loss": 0.0179, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.554031252861023, "rewards/margins": 9.818443298339844, "rewards/rejected": -11.372475624084473, "step": 2590 }, { "epoch": 2.68, "learning_rate": 5.817068503635668e-08, "logits/chosen": -2.3351638317108154, "logits/rejected": -2.226699113845825, "logps/chosen": -269.5257568359375, "logps/rejected": -310.54815673828125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.2515082359313965, "rewards/margins": 8.843351364135742, "rewards/rejected": -11.094860076904297, "step": 2600 }, { "epoch": 2.68, "eval_logits/chosen": -2.418520927429199, "eval_logits/rejected": -2.3138442039489746, "eval_logps/chosen": -316.7310791015625, "eval_logps/rejected": -309.56011962890625, "eval_loss": 0.7192761898040771, "eval_rewards/accuracies": 0.7599999904632568, "eval_rewards/chosen": -4.288391590118408, "eval_rewards/margins": 3.700854778289795, "eval_rewards/rejected": -7.989245891571045, "eval_runtime": 330.2015, "eval_samples_per_second": 6.057, "eval_steps_per_second": 0.379, "step": 2600 }, { "epoch": 2.69, "learning_rate": 5.6257175660160735e-08, "logits/chosen": -2.3566582202911377, "logits/rejected": -2.2767062187194824, "logps/chosen": -286.93414306640625, "logps/rejected": -343.0436706542969, "loss": 0.0041, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.3818962574005127, "rewards/margins": 9.171338081359863, "rewards/rejected": -10.553235054016113, "step": 2610 }, { "epoch": 2.71, "learning_rate": 5.4343666283964784e-08, "logits/chosen": -2.4049510955810547, "logits/rejected": -2.256659746170044, "logps/chosen": -307.3303527832031, "logps/rejected": -365.7108459472656, "loss": 0.0107, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.3092598915100098, "rewards/margins": 9.412524223327637, "rewards/rejected": -10.721784591674805, "step": 2620 }, { "epoch": 2.72, "learning_rate": 5.243015690776884e-08, "logits/chosen": -2.4375126361846924, "logits/rejected": -2.30391001701355, "logps/chosen": -343.348388671875, "logps/rejected": -355.0252990722656, "loss": 0.0072, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.578098177909851, "rewards/margins": 9.311563491821289, "rewards/rejected": -10.88966178894043, "step": 2630 }, { "epoch": 2.73, "learning_rate": 5.05166475315729e-08, "logits/chosen": -2.3526530265808105, "logits/rejected": -2.2642931938171387, "logps/chosen": -269.9921875, "logps/rejected": -338.1278381347656, "loss": 0.0048, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.4594029188156128, "rewards/margins": 9.540541648864746, "rewards/rejected": -10.999944686889648, "step": 2640 }, { "epoch": 2.74, "learning_rate": 4.860313815537696e-08, "logits/chosen": -2.3303122520446777, "logits/rejected": -2.2322537899017334, "logps/chosen": -304.40924072265625, "logps/rejected": -339.898681640625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.252328395843506, "rewards/margins": 9.010406494140625, "rewards/rejected": -11.262735366821289, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.668962877918101e-08, "logits/chosen": -2.300126552581787, "logits/rejected": -2.1978871822357178, "logps/chosen": -281.3654479980469, "logps/rejected": -307.08197021484375, "loss": 0.0129, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.039625644683838, "rewards/margins": 8.738824844360352, "rewards/rejected": -10.778450012207031, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.477611940298507e-08, "logits/chosen": -2.3153433799743652, "logits/rejected": -2.2383222579956055, "logps/chosen": -313.5261535644531, "logps/rejected": -362.248779296875, "loss": 0.0111, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -2.031290054321289, "rewards/margins": 9.838370323181152, "rewards/rejected": -11.869660377502441, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.2862610026789124e-08, "logits/chosen": -2.3045172691345215, "logits/rejected": -2.2299160957336426, "logps/chosen": -275.14276123046875, "logps/rejected": -345.14617919921875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.9210354089736938, "rewards/margins": 9.726446151733398, "rewards/rejected": -11.647480964660645, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.0949100650593186e-08, "logits/chosen": -2.360119104385376, "logits/rejected": -2.2811567783355713, "logps/chosen": -291.0294494628906, "logps/rejected": -362.3855285644531, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.178312063217163, "rewards/margins": 9.444883346557617, "rewards/rejected": -11.623196601867676, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.903559127439724e-08, "logits/chosen": -2.2991082668304443, "logits/rejected": -2.2151143550872803, "logps/chosen": -280.21710205078125, "logps/rejected": -339.8370056152344, "loss": 0.0089, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.628218173980713, "rewards/margins": 10.084501266479492, "rewards/rejected": -12.712719917297363, "step": 2700 }, { "epoch": 2.79, "eval_logits/chosen": -2.395979881286621, "eval_logits/rejected": -2.2942333221435547, "eval_logps/chosen": -322.83795166015625, "eval_logps/rejected": -316.2196044921875, "eval_loss": 0.738807737827301, "eval_rewards/accuracies": 0.765999972820282, "eval_rewards/chosen": -4.899077892303467, "eval_rewards/margins": 3.7561144828796387, "eval_rewards/rejected": -8.655191421508789, "eval_runtime": 330.5088, "eval_samples_per_second": 6.051, "eval_steps_per_second": 0.378, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.71220818982013e-08, "logits/chosen": -2.360029935836792, "logits/rejected": -2.322143077850342, "logps/chosen": -303.7354736328125, "logps/rejected": -367.031005859375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.0700087547302246, "rewards/margins": 9.75294017791748, "rewards/rejected": -11.822949409484863, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.520857252200535e-08, "logits/chosen": -2.272301197052002, "logits/rejected": -2.2256054878234863, "logps/chosen": -273.448486328125, "logps/rejected": -336.57305908203125, "loss": 0.0366, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.192540168762207, "rewards/margins": 9.447149276733398, "rewards/rejected": -11.639688491821289, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.3295063145809414e-08, "logits/chosen": -2.3102471828460693, "logits/rejected": -2.2527964115142822, "logps/chosen": -293.8779602050781, "logps/rejected": -362.58563232421875, "loss": 0.0113, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -2.485677719116211, "rewards/margins": 9.276286125183105, "rewards/rejected": -11.761963844299316, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.138155376961347e-08, "logits/chosen": -2.342339038848877, "logits/rejected": -2.276078462600708, "logps/chosen": -300.9256286621094, "logps/rejected": -342.9737243652344, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.9704710245132446, "rewards/margins": 9.322904586791992, "rewards/rejected": -11.293375015258789, "step": 2740 }, { "epoch": 2.84, "learning_rate": 2.9468044393417525e-08, "logits/chosen": -2.323369264602661, "logits/rejected": -2.2281460762023926, "logps/chosen": -298.3914794921875, "logps/rejected": -334.6033020019531, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.0311081409454346, "rewards/margins": 9.329480171203613, "rewards/rejected": -11.360588073730469, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.755453501722158e-08, "logits/chosen": -2.251509189605713, "logits/rejected": -2.2324776649475098, "logps/chosen": -290.03106689453125, "logps/rejected": -339.38238525390625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -2.435882091522217, "rewards/margins": 9.06352710723877, "rewards/rejected": -11.499407768249512, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.564102564102564e-08, "logits/chosen": -2.2469379901885986, "logits/rejected": -2.1791176795959473, "logps/chosen": -287.87298583984375, "logps/rejected": -333.5580139160156, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.3411865234375, "rewards/margins": 9.410177230834961, "rewards/rejected": -11.751363754272461, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.3727516264829695e-08, "logits/chosen": -2.291459560394287, "logits/rejected": -2.226825475692749, "logps/chosen": -268.06280517578125, "logps/rejected": -356.3489990234375, "loss": 0.0061, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.386735677719116, "rewards/margins": 9.646692276000977, "rewards/rejected": -12.033427238464355, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.1814006888633754e-08, "logits/chosen": -2.290907382965088, "logits/rejected": -2.2021796703338623, "logps/chosen": -306.6482849121094, "logps/rejected": -370.08038330078125, "loss": 0.005, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.622554302215576, "rewards/margins": 9.027674674987793, "rewards/rejected": -11.650228500366211, "step": 2790 }, { "epoch": 2.89, "learning_rate": 1.990049751243781e-08, "logits/chosen": -2.3071415424346924, "logits/rejected": -2.198819637298584, "logps/chosen": -302.01904296875, "logps/rejected": -338.0803527832031, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.463911294937134, "rewards/margins": 9.179804801940918, "rewards/rejected": -11.643716812133789, "step": 2800 }, { "epoch": 2.89, "eval_logits/chosen": -2.364900588989258, "eval_logits/rejected": -2.2619574069976807, "eval_logps/chosen": -321.83087158203125, "eval_logps/rejected": -316.2637939453125, "eval_loss": 0.7342348694801331, "eval_rewards/accuracies": 0.7639999985694885, "eval_rewards/chosen": -4.798369407653809, "eval_rewards/margins": 3.8612406253814697, "eval_rewards/rejected": -8.659610748291016, "eval_runtime": 330.2098, "eval_samples_per_second": 6.057, "eval_steps_per_second": 0.379, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.7986988136241865e-08, "logits/chosen": -2.27158784866333, "logits/rejected": -2.264709949493408, "logps/chosen": -289.5348205566406, "logps/rejected": -370.21295166015625, "loss": 0.0065, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.4264960289001465, "rewards/margins": 8.935715675354004, "rewards/rejected": -11.362211227416992, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6073478760045924e-08, "logits/chosen": -2.2701802253723145, "logits/rejected": -2.182992458343506, "logps/chosen": -302.4941101074219, "logps/rejected": -356.8041687011719, "loss": 0.0059, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.402383804321289, "rewards/margins": 8.9962739944458, "rewards/rejected": -11.398656845092773, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4159969383849981e-08, "logits/chosen": -2.266104221343994, "logits/rejected": -2.209751605987549, "logps/chosen": -311.89373779296875, "logps/rejected": -364.10321044921875, "loss": 0.0079, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.8278541564941406, "rewards/margins": 9.237452507019043, "rewards/rejected": -11.06530475616455, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2246460007654037e-08, "logits/chosen": -2.2711033821105957, "logits/rejected": -2.213872194290161, "logps/chosen": -296.39361572265625, "logps/rejected": -334.15985107421875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.3090121746063232, "rewards/margins": 9.236477851867676, "rewards/rejected": -11.545488357543945, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0332950631458094e-08, "logits/chosen": -2.3495352268218994, "logits/rejected": -2.28285813331604, "logps/chosen": -316.610595703125, "logps/rejected": -394.0874938964844, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.016587495803833, "rewards/margins": 9.51962947845459, "rewards/rejected": -11.536214828491211, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.419441255262151e-09, "logits/chosen": -2.369030237197876, "logits/rejected": -2.2844557762145996, "logps/chosen": -275.51312255859375, "logps/rejected": -343.92144775390625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.705946445465088, "rewards/margins": 8.749194145202637, "rewards/rejected": -11.455141067504883, "step": 2860 }, { "epoch": 2.96, "learning_rate": 6.505931879066207e-09, "logits/chosen": -2.3274118900299072, "logits/rejected": -2.265143632888794, "logps/chosen": -333.2660827636719, "logps/rejected": -350.2222900390625, "loss": 0.0108, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.5637712478637695, "rewards/margins": 8.87217903137207, "rewards/rejected": -11.435951232910156, "step": 2870 }, { "epoch": 2.97, "learning_rate": 4.592422502870264e-09, "logits/chosen": -2.3058080673217773, "logits/rejected": -2.24003267288208, "logps/chosen": -289.5403747558594, "logps/rejected": -346.329345703125, "loss": 0.0151, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.4935154914855957, "rewards/margins": 8.715460777282715, "rewards/rejected": -11.208975791931152, "step": 2880 }, { "epoch": 2.98, "learning_rate": 2.6789131266743202e-09, "logits/chosen": -2.305800437927246, "logits/rejected": -2.183337926864624, "logps/chosen": -303.07989501953125, "logps/rejected": -338.078125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.6788885593414307, "rewards/margins": 8.81675910949707, "rewards/rejected": -11.495648384094238, "step": 2890 }, { "epoch": 2.99, "learning_rate": 7.654037504783773e-10, "logits/chosen": -2.2422897815704346, "logits/rejected": -2.167163848876953, "logps/chosen": -308.4017639160156, "logps/rejected": -364.2879333496094, "loss": 0.0094, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.076625347137451, "rewards/margins": 9.432092666625977, "rewards/rejected": -11.50871753692627, "step": 2900 }, { "epoch": 2.99, "eval_logits/chosen": -2.3625237941741943, "eval_logits/rejected": -2.259509325027466, "eval_logps/chosen": -321.42047119140625, "eval_logps/rejected": -315.8360595703125, "eval_loss": 0.7374239563941956, "eval_rewards/accuracies": 0.7580000162124634, "eval_rewards/chosen": -4.75732946395874, "eval_rewards/margins": 3.859508514404297, "eval_rewards/rejected": -8.616838455200195, "eval_runtime": 329.7799, "eval_samples_per_second": 6.065, "eval_steps_per_second": 0.379, "step": 2900 }, { "epoch": 3.0, "step": 2904, "total_flos": 0.0, "train_loss": 0.20145473461829064, "train_runtime": 66730.9037, "train_samples_per_second": 2.786, "train_steps_per_second": 0.044 } ], "logging_steps": 10, "max_steps": 2904, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }